sched: Add new prio to cpupri before removing old prio
Steven Rostedt [Wed, 29 Jul 2009 04:21:23 +0000 (00:21 -0400)]
We need to add the new prio to the cpupri accounting before
removing the old prio. This is because removing the old prio
first will open a race window where the cpu will be removed
from pri_active. In this case the cpu will not be visible for
RT push and pulls. This could cause a RT task to not migrate
appropriately, and create a very large latency.

This bug was found with the use of ftrace sched events and
trace_printk.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090729042526.438281019@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

kernel/sched_cpupri.c

index d014efb..0f052fc 100644 (file)
@@ -127,21 +127,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
        /*
         * If the cpu was currently mapped to a different value, we
-        * first need to unmap the old value
+        * need to map it to the new value then remove the old value.
+        * Note, we must add the new value first, otherwise we risk the
+        * cpu being cleared from pri_active, and this cpu could be
+        * missed for a push or pull.
         */
-       if (likely(oldpri != CPUPRI_INVALID)) {
-               struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
-
-               spin_lock_irqsave(&vec->lock, flags);
-
-               vec->count--;
-               if (!vec->count)
-                       clear_bit(oldpri, cp->pri_active);
-               cpumask_clear_cpu(cpu, vec->mask);
-
-               spin_unlock_irqrestore(&vec->lock, flags);
-       }
-
        if (likely(newpri != CPUPRI_INVALID)) {
                struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
 
@@ -154,6 +144,18 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
                spin_unlock_irqrestore(&vec->lock, flags);
        }
+       if (likely(oldpri != CPUPRI_INVALID)) {
+               struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
+
+               spin_lock_irqsave(&vec->lock, flags);
+
+               vec->count--;
+               if (!vec->count)
+                       clear_bit(oldpri, cp->pri_active);
+               cpumask_clear_cpu(cpu, vec->mask);
+
+               spin_unlock_irqrestore(&vec->lock, flags);
+       }
 
        *currpri = newpri;
 }