Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

[linux-3.10.git] / fs / proc / array.c
diff --git a/fs/proc/array.c b/fs/proc/array.c

index 7e4877d9dcb58b71720cb6cd79a690b3149d44bb..d66248a1919b3a3028d07ae1d4dc59f36ce57aea 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -68,7 +68,6 @@
  #include <linux/hugetlb.h>
  #include <linux/pagemap.h>
  #include <linux/swap.h>
-#include <linux/slab.h>
  #include <linux/smp.h>
  #include <linux/signal.h>
  #include <linux/highmem.h>
@@ -80,7 +79,9 @@
  #include <linux/delayacct.h>
  #include <linux/seq_file.h>
  #include <linux/pid_namespace.h>
+#include <linux/ptrace.h>
  #include <linux/tracehook.h>
+#include <linux/user_namespace.h>
  
  #include <asm/pgtable.h>
  #include <asm/processor.h>
@@ -95,7 +96,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  
         get_task_comm(tcomm, p);
  
-       seq_printf(m, "Name:\t");
+       seq_puts(m, "Name:\t");
         end = m->buf + m->size;
         buf = m->buf + m->count;
         name = tcomm;
@@ -122,7 +123,7 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
                 buf++;
         }
         m->count = buf - m->buf;
-       seq_printf(m, "\n");
+       seq_putc(m, '\n');
  }
  
  /*
@@ -131,20 +132,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
   * you can test for combinations of others with
   * simple bit tests.
   */
-static const char *task_state_array[] = {
-       "R (running)",          /*  0 */
-       "S (sleeping)",         /*  1 */
-       "D (disk sleep)",       /*  2 */
-       "T (stopped)",          /*  4 */
-       "T (tracing stop)",     /*  8 */
-       "Z (zombie)",           /* 16 */
-       "X (dead)"              /* 32 */
+static const char * const task_state_array[] = {
+       "R (running)",          /*   0 */
+       "S (sleeping)",         /*   1 */
+       "D (disk sleep)",       /*   2 */
+       "T (stopped)",          /*   4 */
+       "t (tracing stop)",     /*   8 */
+       "Z (zombie)",           /*  16 */
+       "X (dead)",             /*  32 */
+       "x (dead)",             /*  64 */
+       "K (wakekill)",         /* 128 */
+       "W (waking)",           /* 256 */
  };
  
  static inline const char *get_task_state(struct task_struct *tsk)
  {
         unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
-       const char **p = &task_state_array[0];
+       const char * const *p = &task_state_array[0];
+
+       BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
  
         while (state) {
                 p++;
@@ -156,6 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
  static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                                 struct pid *pid, struct task_struct *p)
  {
+       struct user_namespace *user_ns = seq_user_ns(m);
         struct group_info *group_info;
         int g;
         struct fdtable *fdt = NULL;
@@ -167,11 +174,11 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
         tpid = 0;
         if (pid_alive(p)) {
-               struct task_struct *tracer = tracehook_tracer_task(p);
+               struct task_struct *tracer = ptrace_parent(p);
                 if (tracer)
                         tpid = task_pid_nr_ns(tracer, ns);
         }
-       cred = get_cred((struct cred *) __task_cred(p));
+       cred = get_task_cred(p);
         seq_printf(m,
                 "State:\t%s\n"
                 "Tgid:\t%d\n"
@@ -184,8 +191,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                 task_tgid_nr_ns(p, ns),
                 pid_nr_ns(pid, ns),
                 ppid, tpid,
-               cred->uid, cred->euid, cred->suid, cred->fsuid,
-               cred->gid, cred->egid, cred->sgid, cred->fsgid);
+               from_kuid_munged(user_ns, cred->uid),
+               from_kuid_munged(user_ns, cred->euid),
+               from_kuid_munged(user_ns, cred->suid),
+               from_kuid_munged(user_ns, cred->fsuid),
+               from_kgid_munged(user_ns, cred->gid),
+               from_kgid_munged(user_ns, cred->egid),
+               from_kgid_munged(user_ns, cred->sgid),
+               from_kgid_munged(user_ns, cred->fsgid));
  
         task_lock(p);
         if (p->files)
@@ -200,10 +213,11 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
         task_unlock(p);
  
         for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++)
-               seq_printf(m, "%d ", GROUP_AT(group_info, g));
+               seq_printf(m, "%d ",
+                          from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
         put_cred(cred);
  
-       seq_printf(m, "\n");
+       seq_putc(m, '\n');
  }
  
  static void render_sigset_t(struct seq_file *m, const char *header,
@@ -211,7 +225,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
  {
         int i;
  
-       seq_printf(m, "%s", header);
+       seq_puts(m, header);
  
         i = _NSIG;
         do {
@@ -225,7 +239,7 @@ static void render_sigset_t(struct seq_file *m, const char *header,
                 seq_printf(m, "%x", x);
         } while (i >= 4);
  
-       seq_printf(m, "\n");
+       seq_putc(m, '\n');
  }
  
  static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
@@ -262,9 +276,11 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
                 shpending = p->signal->shared_pending.signal;
                 blocked = p->blocked;
                 collect_sigign_sigcatch(p, &ignored, &caught);
-               num_threads = atomic_read(&p->signal->count);
+               num_threads = get_nr_threads(p);
+               rcu_read_lock();  /* FIXME: is this correct? */
                 qsize = atomic_read(&__task_cred(p)->user->sigpending);
-               qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
+               rcu_read_unlock();
+               qlim = task_rlimit(p, RLIMIT_SIGPENDING);
                 unlock_task_sighand(p, &flags);
         }
  
@@ -284,12 +300,12 @@ static void render_cap_t(struct seq_file *m, const char *header,
  {
         unsigned __capi;
  
-       seq_printf(m, "%s", header);
+       seq_puts(m, header);
         CAP_FOR_EACH_U32(__capi) {
                 seq_printf(m, "%08x",
                            a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
         }
-       seq_printf(m, "\n");
+       seq_putc(m, '\n');
  }
  
  static inline void task_cap(struct seq_file *m, struct task_struct *p)
@@ -320,6 +336,16 @@ static inline void task_context_switch_counts(struct seq_file *m,
                         p->nivcsw);
  }
  
+static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
+{
+       seq_puts(m, "Cpus_allowed:\t");
+       seq_cpumask(m, &task->cpus_allowed);
+       seq_putc(m, '\n');
+       seq_puts(m, "Cpus_allowed_list:\t");
+       seq_cpumask_list(m, &task->cpus_allowed);
+       seq_putc(m, '\n');
+}
+
  int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
                         struct pid *pid, struct task_struct *task)
  {
@@ -334,10 +360,8 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
         }
         task_sig(m, task);
         task_cap(m, task);
+       task_cpus_allowed(m, task);
         cpuset_task_status_allowed(m, task);
-#if defined(CONFIG_S390)
-       task_show_regs(m, task);
-#endif
         task_context_switch_counts(m, task);
         return 0;
  }
@@ -346,12 +370,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                         struct pid *pid, struct task_struct *task, int whole)
  {
         unsigned long vsize, eip, esp, wchan = ~0UL;
-       long priority, nice;
+       int priority, nice;
         int tty_pgrp = -1, tty_nr = 0;
         sigset_t sigign, sigcatch;
         char state;
         pid_t ppid = 0, pgid = -1, sid = -1;
         int num_threads = 0;
+       int permitted;
         struct mm_struct *mm;
         unsigned long long start_time;
         unsigned long cmin_flt = 0, cmaj_flt = 0;
@@ -364,19 +389,22 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
  
         state = *get_task_state(task);
         vsize = eip = esp = 0;
+       permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
         mm = get_task_mm(task);
         if (mm) {
                 vsize = task_vsize(mm);
-               eip = KSTK_EIP(task);
-               esp = KSTK_ESP(task);
+               if (permitted) {
+                       eip = KSTK_EIP(task);
+                       esp = KSTK_ESP(task);
+               }
         }
  
         get_task_comm(tcomm, task);
  
         sigemptyset(&sigign);
         sigemptyset(&sigcatch);
-       cutime = cstime = utime = stime = cputime_zero;
-       cgtime = gtime = cputime_zero;
+       cutime = cstime = utime = stime = 0;
+       cgtime = gtime = 0;
  
         if (lock_task_sighand(task, &flags)) {
                 struct signal_struct *sig = task->signal;
@@ -388,7 +416,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                         tty_nr = new_encode_dev(tty_devnum(sig->tty));
                 }
  
-               num_threads = atomic_read(&sig->count);
+               num_threads = get_nr_threads(task);
                 collect_sigign_sigcatch(task, &sigign, &sigcatch);
  
                 cmin_flt = sig->cmin_flt;
@@ -396,25 +424,22 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                 cutime = sig->cutime;
                 cstime = sig->cstime;
                 cgtime = sig->cgtime;
-               rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+               rsslim = ACCESS_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
  
                 /* add up live thread stats at the group level */
                 if (whole) {
-                       struct task_cputime cputime;
                         struct task_struct *t = task;
                         do {
                                 min_flt += t->min_flt;
                                 maj_flt += t->maj_flt;
-                               gtime = cputime_add(gtime, task_gtime(t));
+                               gtime += t->gtime;
                                 t = next_thread(t);
                         } while (t != task);
  
                         min_flt += sig->min_flt;
                         maj_flt += sig->maj_flt;
-                       thread_group_cputime(task, &cputime);
-                       utime = cputime.utime;
-                       stime = cputime.stime;
-                       gtime = cputime_add(gtime, sig->gtime);
+                       thread_group_cputime_adjusted(task, &utime, &stime);
+                       gtime += sig->gtime;
                 }
  
                 sid = task_session_nr_ns(task, ns);
@@ -424,14 +449,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                 unlock_task_sighand(task, &flags);
         }
  
-       if (!whole || num_threads < 2)
+       if (permitted && (!whole || num_threads < 2))
                 wchan = get_wchan(task);
         if (!whole) {
                 min_flt = task->min_flt;
                 maj_flt = task->maj_flt;
-               utime = task_utime(task);
-               stime = task_stime(task);
-               gtime = task_gtime(task);
+               task_cputime_adjusted(task, &utime, &stime);
+               gtime = task->gtime;
         }
  
         /* scale priority and nice values from timeslices to -20..20 */
@@ -447,56 +471,70 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
         /* convert nsec -> ticks */
         start_time = nsec_to_clock_t(start_time);
  
-       seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
-               pid_nr_ns(pid, ns),
-               tcomm,
-               state,
-               ppid,
-               pgid,
-               sid,
-               tty_nr,
-               tty_pgrp,
-               task->flags,
-               min_flt,
-               cmin_flt,
-               maj_flt,
-               cmaj_flt,
-               cputime_to_clock_t(utime),
-               cputime_to_clock_t(stime),
-               cputime_to_clock_t(cutime),
-               cputime_to_clock_t(cstime),
-               priority,
-               nice,
-               num_threads,
-               start_time,
-               vsize,
-               mm ? get_mm_rss(mm) : 0,
-               rsslim,
-               mm ? mm->start_code : 0,
-               mm ? mm->end_code : 0,
-               mm ? mm->start_stack : 0,
-               esp,
-               eip,
-               /* The signal information here is obsolete.
-                * It must be decimal for Linux 2.0 compatibility.
-                * Use /proc/#/status for real-time signals.
-                */
-               task->pending.signal.sig[0] & 0x7fffffffUL,
-               task->blocked.sig[0] & 0x7fffffffUL,
-               sigign      .sig[0] & 0x7fffffffUL,
-               sigcatch    .sig[0] & 0x7fffffffUL,
-               wchan,
-               0UL,
-               0UL,
-               task->exit_signal,
-               task_cpu(task),
-               task->rt_priority,
-               task->policy,
-               (unsigned long long)delayacct_blkio_ticks(task),
-               cputime_to_clock_t(gtime),
-               cputime_to_clock_t(cgtime));
+       seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+       seq_put_decimal_ll(m, ' ', ppid);
+       seq_put_decimal_ll(m, ' ', pgid);
+       seq_put_decimal_ll(m, ' ', sid);
+       seq_put_decimal_ll(m, ' ', tty_nr);
+       seq_put_decimal_ll(m, ' ', tty_pgrp);
+       seq_put_decimal_ull(m, ' ', task->flags);
+       seq_put_decimal_ull(m, ' ', min_flt);
+       seq_put_decimal_ull(m, ' ', cmin_flt);
+       seq_put_decimal_ull(m, ' ', maj_flt);
+       seq_put_decimal_ull(m, ' ', cmaj_flt);
+       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime));
+       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime));
+       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime));
+       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime));
+       seq_put_decimal_ll(m, ' ', priority);
+       seq_put_decimal_ll(m, ' ', nice);
+       seq_put_decimal_ll(m, ' ', num_threads);
+       seq_put_decimal_ull(m, ' ', 0);
+       seq_put_decimal_ull(m, ' ', start_time);
+       seq_put_decimal_ull(m, ' ', vsize);
+       seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0);
+       seq_put_decimal_ull(m, ' ', rsslim);
+       seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0);
+       seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0);
+       seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0);
+       seq_put_decimal_ull(m, ' ', esp);
+       seq_put_decimal_ull(m, ' ', eip);
+       /* The signal information here is obsolete.
+        * It must be decimal for Linux 2.0 compatibility.
+        * Use /proc/#/status for real-time signals.
+        */
+       seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
+       seq_put_decimal_ull(m, ' ', wchan);
+       seq_put_decimal_ull(m, ' ', 0);
+       seq_put_decimal_ull(m, ' ', 0);
+       seq_put_decimal_ll(m, ' ', task->exit_signal);
+       seq_put_decimal_ll(m, ' ', task_cpu(task));
+       seq_put_decimal_ull(m, ' ', task->rt_priority);
+       seq_put_decimal_ull(m, ' ', task->policy);
+       seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task));
+       seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime));
+       seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime));
+
+       if (mm && permitted) {
+               seq_put_decimal_ull(m, ' ', mm->start_data);
+               seq_put_decimal_ull(m, ' ', mm->end_data);
+               seq_put_decimal_ull(m, ' ', mm->start_brk);
+               seq_put_decimal_ull(m, ' ', mm->arg_start);
+               seq_put_decimal_ull(m, ' ', mm->arg_end);
+               seq_put_decimal_ull(m, ' ', mm->env_start);
+               seq_put_decimal_ull(m, ' ', mm->env_end);
+       } else
+               seq_printf(m, " 0 0 0 0 0 0 0");
+
+       if (permitted)
+               seq_put_decimal_ll(m, ' ', task->exit_code);
+       else
+               seq_put_decimal_ll(m, ' ', 0);
+
+       seq_putc(m, '\n');
         if (mm)
                 mmput(mm);
         return 0;
@@ -517,15 +555,150 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
  int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
                         struct pid *pid, struct task_struct *task)
  {
-       int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0;
+       unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
         struct mm_struct *mm = get_task_mm(task);
  
         if (mm) {
                 size = task_statm(mm, &shared, &text, &data, &resident);
                 mmput(mm);
         }
-       seq_printf(m, "%d %d %d %d %d %d %d\n",
-                       size, resident, shared, text, lib, data, 0);
+       /*
+        * For quick read, open code by putting numbers directly
+        * expected format is
+        * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
+        *               size, resident, shared, text, data);
+        */
+       seq_put_decimal_ull(m, 0, size);
+       seq_put_decimal_ull(m, ' ', resident);
+       seq_put_decimal_ull(m, ' ', shared);
+       seq_put_decimal_ull(m, ' ', text);
+       seq_put_decimal_ull(m, ' ', 0);
+       seq_put_decimal_ull(m, ' ', data);
+       seq_put_decimal_ull(m, ' ', 0);
+       seq_putc(m, '\n');
  
         return 0;
  }
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct pid *
+get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
+{
+       struct task_struct *start, *task;
+       struct pid *pid = NULL;
+
+       read_lock(&tasklist_lock);
+
+       start = pid_task(proc_pid(inode), PIDTYPE_PID);
+       if (!start)
+               goto out;
+
+       /*
+        * Lets try to continue searching first, this gives
+        * us significant speedup on children-rich processes.
+        */
+       if (pid_prev) {
+               task = pid_task(pid_prev, PIDTYPE_PID);
+               if (task && task->real_parent == start &&
+                   !(list_empty(&task->sibling))) {
+                       if (list_is_last(&task->sibling, &start->children))
+                               goto out;
+                       task = list_first_entry(&task->sibling,
+                                               struct task_struct, sibling);
+                       pid = get_pid(task_pid(task));
+                       goto out;
+               }
+       }
+
+       /*
+        * Slow search case.
+        *
+        * We might miss some children here if children
+        * are exited while we were not holding the lock,
+        * but it was never promised to be accurate that
+        * much.
+        *
+        * "Just suppose that the parent sleeps, but N children
+        *  exit after we printed their tids. Now the slow paths
+        *  skips N extra children, we miss N tasks." (c)
+        *
+        * So one need to stop or freeze the leader and all
+        * its children to get a precise result.
+        */
+       list_for_each_entry(task, &start->children, sibling) {
+               if (pos-- == 0) {
+                       pid = get_pid(task_pid(task));
+                       break;
+               }
+       }
+
+out:
+       read_unlock(&tasklist_lock);
+       return pid;
+}
+
+static int children_seq_show(struct seq_file *seq, void *v)
+{
+       struct inode *inode = seq->private;
+       pid_t pid;
+
+       pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
+       return seq_printf(seq, "%d ", pid);
+}
+
+static void *children_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return get_children_pid(seq->private, NULL, *pos);
+}
+
+static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct pid *pid;
+
+       pid = get_children_pid(seq->private, v, *pos + 1);
+       put_pid(v);
+
+       ++*pos;
+       return pid;
+}
+
+static void children_seq_stop(struct seq_file *seq, void *v)
+{
+       put_pid(v);
+}
+
+static const struct seq_operations children_seq_ops = {
+       .start  = children_seq_start,
+       .next   = children_seq_next,
+       .stop   = children_seq_stop,
+       .show   = children_seq_show,
+};
+
+static int children_seq_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *m;
+       int ret;
+
+       ret = seq_open(file, &children_seq_ops);
+       if (ret)
+               return ret;
+
+       m = file->private_data;
+       m->private = inode;
+
+       return ret;
+}
+
+int children_seq_release(struct inode *inode, struct file *file)
+{
+       seq_release(inode, file);
+       return 0;
+}
+
+const struct file_operations proc_tid_children_operations = {
+       .open    = children_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = children_seq_release,
+};
+#endif /* CONFIG_CHECKPOINT_RESTORE */