perfcounters: pull inherited counters
Ingo Molnar [Fri, 19 Dec 2008 09:20:42 +0000 (10:20 +0100)]
Change counter inheritance from a 'push' to a 'pull' model: instead of
child tasks pushing their final counts to the parent, reuse the wait4
infrastructure to pull counters as child tasks are exit-processed,
much like how cutime/cstime is collected.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/init_task.h
kernel/exit.c

index 23fd890..54fa2fa 100644 (file)
@@ -113,6 +113,14 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
+#ifdef CONFIG_PERF_COUNTERS
+# define INIT_PERF_COUNTERS(tsk)                                       \
+       .perf_counter_ctx.counter_list =                                \
+               LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),
+#else
+# define INIT_PERF_COUNTERS(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -180,6 +188,7 @@ extern struct group_info init_groups;
        INIT_IDS                                                        \
        INIT_TRACE_IRQFLAGS                                             \
        INIT_LOCKDEP                                                    \
+       INIT_PERF_COUNTERS(tsk)                                         \
 }
 
 
index 244edfd..101b7ee 100644 (file)
@@ -153,6 +153,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
+#ifdef CONFIG_PERF_COUNTERS
+       WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
+#endif
        trace_sched_process_free(tsk);
        put_task_struct(tsk);
 }
@@ -922,12 +925,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
        forget_original_parent(tsk);
        exit_task_namespaces(tsk);
 
-       /*
-        * Flush inherited counters to the parent - before the parent
-        * gets woken up by child-exit notifications.
-        */
-       perf_counter_exit_task(tsk);
-
        write_lock_irq(&tasklist_lock);
        if (group_dead)
                kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1122,12 +1119,6 @@ NORET_TYPE void do_exit(long code)
        if (tsk->splice_pipe)
                __free_pipe_info(tsk->splice_pipe);
 
-       /*
-        * These must happen late, after the PID is not
-        * hashed anymore, but still at a point that may sleep:
-        */
-       perf_counter_exit_task(tsk);
-
        preempt_disable();
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
@@ -1371,6 +1362,12 @@ static int wait_task_zombie(struct task_struct *p, int options,
         */
        read_unlock(&tasklist_lock);
 
+       /*
+        * Flush inherited counters to the parent - before the parent
+        * gets woken up by child-exit notifications.
+        */
+       perf_counter_exit_task(p);
+
        retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
                ? p->signal->group_exit_code : p->exit_code;