sched: rt throttling vs no_hz
Peter Zijlstra [Fri, 25 Jan 2008 20:08:31 +0000 (21:08 +0100)]
We need to teach no_hz about the rt throttling because its tick driven.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/sched.h
kernel/sched.c
kernel/sched_rt.c
kernel/time/tick-sched.c

index 04eecbf..acadcab 100644 (file)
@@ -230,6 +230,8 @@ static inline int select_nohz_load_balancer(int cpu)
 }
 #endif
 
+extern unsigned long rt_needs_cpu(int cpu);
+
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
index 5ea2c53..22712b2 100644 (file)
@@ -442,6 +442,7 @@ struct rq {
        struct cfs_rq cfs;
        struct rt_rq rt;
        u64 rt_period_expire;
+       int rt_throttled;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
        /* list of leaf cfs_rq on this cpu: */
@@ -594,6 +595,23 @@ static void update_rq_clock(struct rq *rq)
 #define task_rq(p)             cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
 
+unsigned long rt_needs_cpu(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+       u64 delta;
+
+       if (!rq->rt_throttled)
+               return 0;
+
+       if (rq->clock > rq->rt_period_expire)
+               return 1;
+
+       delta = rq->rt_period_expire - rq->clock;
+       do_div(delta, NSEC_PER_SEC / HZ);
+
+       return (unsigned long)delta;
+}
+
 /*
  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  */
@@ -7102,9 +7120,11 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        /* delimiter for bitsearch: */
        __set_bit(MAX_RT_PRIO, array->bitmap);
 
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+       rt_rq->highest_prio = MAX_RT_PRIO;
+#endif
 #ifdef CONFIG_SMP
        rt_rq->rt_nr_migratory = 0;
-       rt_rq->highest_prio = MAX_RT_PRIO;
        rt_rq->overloaded = 0;
 #endif
 
@@ -7191,6 +7211,7 @@ void __init sched_init(void)
                                &per_cpu(init_sched_rt_entity, i), i, 1);
 #endif
                rq->rt_period_expire = 0;
+               rq->rt_throttled = 0;
 
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                        rq->cpu_load[j] = 0;
index 1144bf5..8bfdb3f 100644 (file)
@@ -175,7 +175,11 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
        ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
 
        if (rt_rq->rt_time > ratio) {
+               struct rq *rq = rq_of_rt_rq(rt_rq);
+
+               rq->rt_throttled = 1;
                rt_rq->rt_throttled = 1;
+
                sched_rt_ratio_dequeue(rt_rq);
                return 1;
        }
@@ -183,18 +187,6 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
        return 0;
 }
 
-static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
-{
-       unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-       u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
-       rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
-       if (rt_rq->rt_throttled) {
-               rt_rq->rt_throttled = 0;
-               sched_rt_ratio_enqueue(rt_rq);
-       }
-}
-
 static void update_sched_rt_period(struct rq *rq)
 {
        struct rt_rq *rt_rq;
@@ -204,8 +196,18 @@ static void update_sched_rt_period(struct rq *rq)
                period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
                rq->rt_period_expire += period;
 
-               for_each_leaf_rt_rq(rt_rq, rq)
-                       __update_sched_rt_period(rt_rq, period);
+               for_each_leaf_rt_rq(rt_rq, rq) {
+                       unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+                       u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+                       rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+                       if (rt_rq->rt_throttled) {
+                               rt_rq->rt_throttled = 0;
+                               sched_rt_ratio_enqueue(rt_rq);
+                       }
+               }
+
+               rq->rt_throttled = 0;
        }
 }
 
index cb89fa8..5f9fb64 100644 (file)
@@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void)
 void tick_nohz_stop_sched_tick(void)
 {
        unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+       unsigned long rt_jiffies;
        struct tick_sched *ts;
        ktime_t last_update, expires, now, delta;
        struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void)
        next_jiffies = get_next_timer_interrupt(last_jiffies);
        delta_jiffies = next_jiffies - last_jiffies;
 
+       rt_jiffies = rt_needs_cpu(cpu);
+       if (rt_jiffies && rt_jiffies < delta_jiffies)
+               delta_jiffies = rt_jiffies;
+
        if (rcu_needs_cpu(cpu))
                delta_jiffies = 1;
        /*