sched: Fix and clean up rate-limit newidle code
Mike Galbraith [Tue, 10 Nov 2009 02:50:02 +0000 (03:50 +0100)]
Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix
the netperf UDP loopback regression reported by Alex Shi.

This is a cleanup and a fix:

 - moved to a more out of the way spot

 - fix to ensure that balancing doesn't try to balance
   runqueues which haven't gone online yet, which can
   mess up CPU enumeration during boot.

Reported-by: Alex Shi <alex.shi@intel.com>
Reported-by: Zhang, Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org> # .32.x: a1f84a3: sched: Check for an idle shared cache
Cc: <stable@kernel.org> # .32.x: 1b9508f: sched: Rate-limit newidle
Cc: <stable@kernel.org> # .32.x: fd21073: sched: Fix affinity logic
Cc: <stable@kernel.org> # .32.x
LKML-Reference: <1257821402.5648.17.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

kernel/sched.c

index 23e3535..ad37776 100644 (file)
@@ -2354,17 +2354,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
        if (rq != orig_rq)
                update_rq_clock(rq);
 
-       if (rq->idle_stamp) {
-               u64 delta = rq->clock - rq->idle_stamp;
-               u64 max = 2*sysctl_sched_migration_cost;
-
-               if (delta > max)
-                       rq->avg_idle = max;
-               else
-                       update_avg(&rq->avg_idle, delta);
-               rq->idle_stamp = 0;
-       }
-
        WARN_ON(p->state != TASK_WAKING);
        cpu = task_cpu(p);
 
@@ -2421,6 +2410,17 @@ out_running:
 #ifdef CONFIG_SMP
        if (p->sched_class->task_wake_up)
                p->sched_class->task_wake_up(rq, p);
+
+       if (unlikely(rq->idle_stamp)) {
+               u64 delta = rq->clock - rq->idle_stamp;
+               u64 max = 2*sysctl_sched_migration_cost;
+
+               if (delta > max)
+                       rq->avg_idle = max;
+               else
+                       update_avg(&rq->avg_idle, delta);
+               rq->idle_stamp = 0;
+       }
 #endif
 out:
        task_rq_unlock(rq, &flags);
@@ -4098,7 +4098,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        unsigned long flags;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-       cpumask_setall(cpus);
+       cpumask_copy(cpus, cpu_online_mask);
 
        /*
         * When power savings policy is enabled for the parent domain, idle
@@ -4261,7 +4261,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
        int all_pinned = 0;
        struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-       cpumask_setall(cpus);
+       cpumask_copy(cpus, cpu_online_mask);
 
        /*
         * When power savings policy is enabled for the parent domain, idle
@@ -9522,6 +9522,8 @@ void __init sched_init(void)
                rq->cpu = i;
                rq->online = 0;
                rq->migration_thread = NULL;
+               rq->idle_stamp = 0;
+               rq->avg_idle = 2*sysctl_sched_migration_cost;
                INIT_LIST_HEAD(&rq->migration_queue);
                rq_attach_root(rq, &def_root_domain);
 #endif