sched: remove wait_runtime limit
Ingo Molnar [Mon, 15 Oct 2007 15:00:06 +0000 (17:00 +0200)]
remove the wait_runtime-limit fields and the code depending on it, now
that the math has been changed over to rely on the vruntime metric.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

include/linux/sched.h
kernel/sched.c
kernel/sched_debug.c
kernel/sched_fair.c
kernel/sysctl.c

index 5e5c457..353630d 100644 (file)
@@ -905,7 +905,6 @@ struct sched_entity {
        u64                     vruntime;
        u64                     prev_sum_exec_runtime;
        u64                     wait_start_fair;
-       u64                     sleep_start_fair;
 
 #ifdef CONFIG_SCHEDSTATS
        u64                     wait_start;
index 3a4ac0b..21cc3b2 100644 (file)
@@ -180,7 +180,6 @@ struct cfs_rq {
        u64 exec_clock;
        u64 min_vruntime;
        s64 wait_runtime;
-       u64 sleeper_bonus;
        unsigned long wait_runtime_overruns, wait_runtime_underruns;
 
        struct rb_root tasks_timeline;
@@ -673,19 +672,6 @@ static inline void resched_task(struct task_struct *p)
 }
 #endif
 
-static u64 div64_likely32(u64 divident, unsigned long divisor)
-{
-#if BITS_PER_LONG == 32
-       if (likely(divident <= 0xffffffffULL))
-               return (u32)divident / divisor;
-       do_div(divident, divisor);
-
-       return divident;
-#else
-       return divident / divisor;
-#endif
-}
-
 #if BITS_PER_LONG == 32
 # define WMULT_CONST   (~0UL)
 #else
@@ -1016,8 +1002,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
        if (p->se.wait_start_fair)
                p->se.wait_start_fair -= fair_clock_offset;
-       if (p->se.sleep_start_fair)
-               p->se.sleep_start_fair -= fair_clock_offset;
 
 #ifdef CONFIG_SCHEDSTATS
        if (p->se.wait_start)
@@ -1592,7 +1576,6 @@ static void __sched_fork(struct task_struct *p)
        p->se.sum_exec_runtime          = 0;
        p->se.prev_sum_exec_runtime     = 0;
        p->se.wait_runtime              = 0;
-       p->se.sleep_start_fair          = 0;
 
 #ifdef CONFIG_SCHEDSTATS
        p->se.wait_start                = 0;
@@ -6582,7 +6565,6 @@ void normalize_rt_tasks(void)
                p->se.wait_runtime              = 0;
                p->se.exec_start                = 0;
                p->se.wait_start_fair           = 0;
-               p->se.sleep_start_fair          = 0;
 #ifdef CONFIG_SCHEDSTATS
                p->se.wait_start                = 0;
                p->se.sleep_start               = 0;
index 62965f0..3350169 100644 (file)
@@ -148,7 +148,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        P(wait_runtime);
        P(wait_runtime_overruns);
        P(wait_runtime_underruns);
-       P(sleeper_bonus);
 #undef P
 
        print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
@@ -272,7 +271,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        P(se.wait_runtime);
        P(se.wait_start_fair);
        P(se.exec_start);
-       P(se.sleep_start_fair);
        P(se.vruntime);
        P(se.sum_exec_runtime);
 
index 72f202a..a94189c 100644 (file)
@@ -249,41 +249,11 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
        return period;
 }
 
-static inline void
-limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-       long limit = sysctl_sched_runtime_limit;
-
-       /*
-        * Niced tasks have the same history dynamic range as
-        * non-niced tasks:
-        */
-       if (unlikely(se->wait_runtime > limit)) {
-               se->wait_runtime = limit;
-               schedstat_inc(se, wait_runtime_overruns);
-               schedstat_inc(cfs_rq, wait_runtime_overruns);
-       }
-       if (unlikely(se->wait_runtime < -limit)) {
-               se->wait_runtime = -limit;
-               schedstat_inc(se, wait_runtime_underruns);
-               schedstat_inc(cfs_rq, wait_runtime_underruns);
-       }
-}
-
-static inline void
-__add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
-       se->wait_runtime += delta;
-       schedstat_add(se, sum_wait_runtime, delta);
-       limit_wait_runtime(cfs_rq, se);
-}
-
 static void
 add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
 {
-       schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
-       __add_wait_runtime(cfs_rq, se, delta);
-       schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+       se->wait_runtime += delta;
+       schedstat_add(cfs_rq, wait_runtime, delta);
 }
 
 /*
@@ -294,7 +264,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
              unsigned long delta_exec)
 {
-       unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
+       unsigned long delta_fair, delta_mine, delta_exec_weighted;
        struct load_weight *lw = &cfs_rq->load;
        unsigned long load = lw->weight;
 
@@ -318,14 +288,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
        delta_fair = calc_delta_fair(delta_exec, lw);
        delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
 
-       if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) {
-               delta = min((u64)delta_mine, cfs_rq->sleeper_bonus);
-               delta = min(delta, (unsigned long)(
-                       (long)sysctl_sched_runtime_limit - curr->wait_runtime));
-               cfs_rq->sleeper_bonus -= delta;
-               delta_mine -= delta;
-       }
-
        cfs_rq->fair_clock += delta_fair;
        /*
         * We executed delta_exec amount of time on the CPU,
@@ -461,58 +423,8 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
-                             unsigned long delta_fair)
-{
-       unsigned long load = cfs_rq->load.weight;
-       long prev_runtime;
-
-       /*
-        * Do not boost sleepers if there's too much bonus 'in flight'
-        * already:
-        */
-       if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
-               return;
-
-       if (sched_feat(SLEEPER_LOAD_AVG))
-               load = rq_of(cfs_rq)->cpu_load[2];
-
-       /*
-        * Fix up delta_fair with the effect of us running
-        * during the whole sleep period:
-        */
-       if (sched_feat(SLEEPER_AVG))
-               delta_fair = div64_likely32((u64)delta_fair * load,
-                                               load + se->load.weight);
-
-       delta_fair = calc_weighted(delta_fair, se);
-
-       prev_runtime = se->wait_runtime;
-       __add_wait_runtime(cfs_rq, se, delta_fair);
-       delta_fair = se->wait_runtime - prev_runtime;
-
-       /*
-        * Track the amount of bonus we've given to sleepers:
-        */
-       cfs_rq->sleeper_bonus += delta_fair;
-}
-
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-       struct task_struct *tsk = task_of(se);
-       unsigned long delta_fair;
-
-       if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) ||
-                        !sched_feat(FAIR_SLEEPERS))
-               return;
-
-       delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
-               (u64)(cfs_rq->fair_clock - se->sleep_start_fair));
-
-       __enqueue_sleeper(cfs_rq, se, delta_fair);
-
-       se->sleep_start_fair = 0;
-
 #ifdef CONFIG_SCHEDSTATS
        if (se->sleep_start) {
                u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
@@ -544,6 +456,8 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 * time that the task spent sleeping:
                 */
                if (unlikely(prof_on == SLEEP_PROFILING)) {
+                       struct task_struct *tsk = task_of(se);
+
                        profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
                                     delta >> 20);
                }
@@ -604,7 +518,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
        update_stats_dequeue(cfs_rq, se);
        if (sleep) {
-               se->sleep_start_fair = cfs_rq->fair_clock;
 #ifdef CONFIG_SCHEDSTATS
                if (entity_is_task(se)) {
                        struct task_struct *tsk = task_of(se);
index 9b1b0d4..97b15c2 100644 (file)
@@ -266,17 +266,6 @@ static ctl_table kern_table[] = {
        },
        {
                .ctl_name       = CTL_UNNUMBERED,
-               .procname       = "sched_runtime_limit_ns",
-               .data           = &sysctl_sched_runtime_limit,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &min_sched_granularity_ns,
-               .extra2         = &max_sched_granularity_ns,
-       },
-       {
-               .ctl_name       = CTL_UNNUMBERED,
                .procname       = "sched_child_runs_first",
                .data           = &sysctl_sched_child_runs_first,
                .maxlen         = sizeof(unsigned int),