sched: SCHED_OTHER vs SCHED_IDLE isolation
Peter Zijlstra [Thu, 15 Jan 2009 13:53:38 +0000 (14:53 +0100)]
Stronger SCHED_IDLE isolation:

 - no SCHED_IDLE buddies
 - never let SCHED_IDLE preempt on wakeup
 - always preempt SCHED_IDLE on wakeup
 - limit SLEEPER fairness for SCHED_IDLE.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

kernel/sched_fair.c

index 8e1352c..cdebd80 100644 (file)
@@ -677,9 +677,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
                        unsigned long thresh = sysctl_sched_latency;
 
                        /*
-                        * convert the sleeper threshold into virtual time
+                        * Convert the sleeper threshold into virtual time.
+                        * SCHED_IDLE is a special sub-class.  We care about
+                        * fairness only relative to other SCHED_IDLE tasks,
+                        * all of which have the same weight.
                         */
-                       if (sched_feat(NORMALIZED_SLEEPER))
+                       if (sched_feat(NORMALIZED_SLEEPER) &&
+                                       task_of(se)->policy != SCHED_IDLE)
                                thresh = calc_delta_fair(thresh, se);
 
                        vruntime -= thresh;
@@ -1340,14 +1344,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
 
 static void set_last_buddy(struct sched_entity *se)
 {
-       for_each_sched_entity(se)
-               cfs_rq_of(se)->last = se;
+       if (likely(task_of(se)->policy != SCHED_IDLE)) {
+               for_each_sched_entity(se)
+                       cfs_rq_of(se)->last = se;
+       }
 }
 
 static void set_next_buddy(struct sched_entity *se)
 {
-       for_each_sched_entity(se)
-               cfs_rq_of(se)->next = se;
+       if (likely(task_of(se)->policy != SCHED_IDLE)) {
+               for_each_sched_entity(se)
+                       cfs_rq_of(se)->next = se;
+       }
 }
 
 /*
@@ -1393,12 +1401,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
                return;
 
        /*
-        * Batch tasks do not preempt (their preemption is driven by
+        * Batch and idle tasks do not preempt (their preemption is driven by
         * the tick):
         */
-       if (unlikely(p->policy == SCHED_BATCH))
+       if (unlikely(p->policy != SCHED_NORMAL))
                return;
 
+       /* Idle tasks are by definition preempted by everybody. */
+       if (unlikely(curr->policy == SCHED_IDLE)) {
+               resched_task(curr);
+               return;
+       }
+
        if (!sched_feat(WAKEUP_PREEMPT))
                return;