sched: Fix/clarify set_task_cpu() locking rules
Peter Zijlstra [Fri, 3 Jun 2011 15:37:07 +0000 (17:37 +0200)]
Sergey reported a CONFIG_PROVE_RCU warning in push_rt_task where
set_task_cpu() was called with both relevant rq->locks held, which
should be sufficient for running tasks since holding its rq->lock
will serialize against sched_move_task().

Update the comments and fix the task_group() lockdep test.

Reported-and-tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1307115427.2353.3456.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>

kernel/sched.c

index 2fe98ed..3f2e502 100644 (file)
@@ -605,10 +605,10 @@ static inline int cpu_of(struct rq *rq)
 /*
  * Return the group to which this tasks belongs.
  *
- * We use task_subsys_state_check() and extend the RCU verification
- * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
- * holds that lock for each task it moves into the cgroup. Therefore
- * by holding that lock, we pin the task to the current cgroup.
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
  */
 static inline struct task_group *task_group(struct task_struct *p)
 {
@@ -616,7 +616,8 @@ static inline struct task_group *task_group(struct task_struct *p)
        struct cgroup_subsys_state *css;
 
        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-                       lockdep_is_held(&p->pi_lock));
+                       lockdep_is_held(&p->pi_lock) ||
+                       lockdep_is_held(&task_rq(p)->lock));
        tg = container_of(css, struct task_group, css);
 
        return autogroup_task_group(p, tg);
@@ -2200,6 +2201,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                        !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
 
 #ifdef CONFIG_LOCKDEP
+       /*
+        * The caller should hold either p->pi_lock or rq->lock, when changing
+        * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
+        *
+        * sched_move_task() holds both and thus holding either pins the cgroup,
+        * see set_task_rq().
+        *
+        * Furthermore, all task_rq users should acquire both locks, see
+        * task_rq_lock().
+        */
        WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
                                      lockdep_is_held(&task_rq(p)->lock)));
 #endif