Revert "cpuquiet: Update averaging of nr_runnables"
Puneet Saxena [Thu, 20 Sep 2012 13:52:09 +0000 (18:52 +0530)]
This reverts commit 8bd999a85354485af3cbee872816a9921d8bfffc.

bug 1050721

Change-Id: I29fcff431e5427dfaa2524a12c5702154037018a
Signed-off-by: Puneet Saxena <puneets@nvidia.com>
Reviewed-on: http://git-master/r/134307
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>

drivers/cpuquiet/governors/balanced.c
drivers/cpuquiet/governors/runnable_threads.c
fs/proc/loadavg.c
include/linux/sched.h
kernel/sched/core.c
kernel/sched/debug.c
kernel/sched/sched.h

index f187206..1ffbc62 100644 (file)
@@ -204,54 +204,6 @@ static unsigned int *rt_profiles[] = {
 static unsigned int nr_run_hysteresis = 2;     /* 0.5 thread */
 static unsigned int nr_run_last;
 
-struct runnables_avg_sample {
-       u64 previous_integral;
-       unsigned int avg;
-       bool integral_sampled;
-       u64 prev_timestamp;
-};
-
-static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
-
-static unsigned int get_avg_nr_runnables(void)
-{
-       unsigned int i, sum = 0;
-       struct runnables_avg_sample *sample;
-       u64 integral, old_integral, delta_integral, delta_time, cur_time;
-
-       for_each_online_cpu(i) {
-               sample = &per_cpu(avg_nr_sample, i);
-               integral = nr_running_integral(i);
-               old_integral = sample->previous_integral;
-               sample->previous_integral = integral;
-               cur_time = ktime_to_ns(ktime_get());
-               delta_time = cur_time - sample->prev_timestamp;
-               sample->prev_timestamp = cur_time;
-
-               if (!sample->integral_sampled) {
-                       sample->integral_sampled = true;
-                       /* First sample to initialize prev_integral, skip
-                        * avg calculation
-                        */
-                       continue;
-               }
-
-               if (integral < old_integral) {
-                       /* Overflow */
-                       delta_integral = (ULLONG_MAX - old_integral) + integral;
-               } else {
-                       delta_integral = integral - old_integral;
-               }
-
-               /* Calculate average for the previous sample window */
-               do_div(delta_integral, delta_time);
-               sample->avg = delta_integral;
-               sum += sample->avg;
-       }
-
-       return sum;
-}
-
 static CPU_SPEED_BALANCE balanced_speed_balance(void)
 {
        unsigned long highest_speed = cpu_highest_speed();
@@ -259,7 +211,7 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void)
        unsigned long skewed_speed = balanced_speed / 2;
        unsigned int nr_cpus = num_online_cpus();
        unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
-       unsigned int avg_nr_run = get_avg_nr_runnables();
+       unsigned int avg_nr_run = avg_nr_running();
        unsigned int nr_run;
        unsigned int *current_profile = rt_profiles[rt_profile_sel];
 
index fcc5031..c6b3bd5 100644 (file)
@@ -33,15 +33,14 @@ typedef enum {
        UP,
 } RUNNABLES_STATE;
 
-static struct work_struct runnables_work;
+static struct delayed_work runnables_work;
 static struct kobject *runnables_kobject;
-static struct timer_list runnables_timer;
 
-static RUNNABLES_STATE runnables_state;
 /* configurable parameters */
-static unsigned int sample_rate = 10;          /* msec */
+static unsigned int sample_rate = 20;          /* msec */
 
 static RUNNABLES_STATE runnables_state;
+static struct workqueue_struct *runnables_wq;
 
 #define NR_FSHIFT_EXP  3
 #define NR_FSHIFT      (1 << NR_FSHIFT_EXP)
@@ -57,93 +56,17 @@ static unsigned int nr_run_thresholds[NR_CPUS];
 
 DEFINE_MUTEX(runnables_work_lock);
 
-struct runnables_avg_sample {
-       u64 previous_integral;
-       unsigned int avg;
-       bool integral_sampled;
-       u64 prev_timestamp;
-};
-
-static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
-
-/* EXP = alpha in the exponential moving average.
- * Alpha = e ^ (-sample_rate / window_size) * FIXED_1
- * Calculated for sample_rate of 10ms, window size of 63.82ms
- */
-#define EXP    1751
-
-static unsigned int get_avg_nr_runnables(void)
-{
-       unsigned int i, sum = 0;
-       static unsigned int avg;
-       struct runnables_avg_sample *sample;
-       u64 integral, old_integral, delta_integral, delta_time, cur_time;
-
-       for_each_online_cpu(i) {
-               sample = &per_cpu(avg_nr_sample, i);
-               integral = nr_running_integral(i);
-               old_integral = sample->previous_integral;
-               sample->previous_integral = integral;
-               cur_time = ktime_to_ns(ktime_get());
-               delta_time = cur_time - sample->prev_timestamp;
-               sample->prev_timestamp = cur_time;
-
-               if (!sample->integral_sampled) {
-                       sample->integral_sampled = true;
-                       /* First sample to initialize prev_integral, skip
-                        * avg calculation
-                        */
-                       continue;
-               }
-
-               if (integral < old_integral) {
-                       /* Overflow */
-                       delta_integral = (ULLONG_MAX - old_integral) + integral;
-               } else {
-                       delta_integral = integral - old_integral;
-               }
-
-               /* Calculate average for the previous sample window */
-               do_div(delta_integral, delta_time);
-               sample->avg = delta_integral;
-               sum += sample->avg;
-       }
-
-       /* Exponential moving average
-        * Avgn = Avgn-1 * alpha + new_avg * (1 - alpha)
-        */
-       avg *= EXP;
-       avg += sum * (FIXED_1 - EXP);
-       avg >>= FSHIFT;
-
-       return avg;
-}
-
-static void update_runnables_state(unsigned int nr_run)
+static void update_runnables_state(void)
 {
        unsigned int nr_cpus = num_online_cpus();
        int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
        int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS);
-
-       if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
-               runnables_state = DOWN;
-       } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
-               runnables_state =  UP;
-       } else {
-               runnables_state = IDLE;
-       }
-}
-
-static void runnables_avg_sampler(unsigned long data)
-{
-       unsigned int nr_run, avg_nr_run;
+       unsigned int avg_nr_run = avg_nr_running();
+       unsigned int nr_run;
 
        if (runnables_state == DISABLED)
                return;
 
-       avg_nr_run = get_avg_nr_runnables();
-       mod_timer(&runnables_timer, jiffies + msecs_to_jiffies(sample_rate));
-
        for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
                unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
                if (nr_run_last <= nr_run)
@@ -151,12 +74,15 @@ static void runnables_avg_sampler(unsigned long data)
                if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP)))
                        break;
        }
-
        nr_run_last = nr_run;
-       update_runnables_state(nr_run);
 
-       if (runnables_state != DISABLED && runnables_state != IDLE)
-               schedule_work(&runnables_work);
+       if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
+               runnables_state = DOWN;
+       } else if (nr_cpus < min_cpus || nr_run > nr_cpus) {
+               runnables_state =  UP;
+       } else {
+               runnables_state = IDLE;
+       }
 }
 
 static unsigned int get_lightest_loaded_cpu_n(void)
@@ -166,8 +92,8 @@ static unsigned int get_lightest_loaded_cpu_n(void)
        int i;
 
        for_each_online_cpu(i) {
-               struct runnables_avg_sample *s = &per_cpu(avg_nr_sample, i);
-               unsigned int nr_runnables = s->avg;
+               unsigned int nr_runnables = get_avg_nr_running(i);
+
                if (i > 0 && min_avg_runnables > nr_runnables) {
                        cpu = i;
                        min_avg_runnables = nr_runnables;
@@ -180,23 +106,27 @@ static unsigned int get_lightest_loaded_cpu_n(void)
 static void runnables_work_func(struct work_struct *work)
 {
        bool up = false;
+       bool sample = false;
        unsigned int cpu = nr_cpu_ids;
 
        mutex_lock(&runnables_work_lock);
 
-       /* Update state to avoid duplicate operations */
-       update_runnables_state(nr_run_last);
+       update_runnables_state();
 
        switch (runnables_state) {
        case DISABLED:
+               break;
        case IDLE:
+               sample = true;
                break;
        case UP:
                cpu = cpumask_next_zero(0, cpu_online_mask);
                up = true;
+               sample = true;
                break;
        case DOWN:
                cpu = get_lightest_loaded_cpu_n();
+               sample = true;
                break;
        default:
                pr_err("%s: invalid cpuquiet runnable governor state %d\n",
@@ -204,6 +134,10 @@ static void runnables_work_func(struct work_struct *work)
                break;
        }
 
+       if (sample)
+               queue_delayed_work(runnables_wq, &runnables_work,
+                                       msecs_to_jiffies(sample_rate));
+
        if (cpu < nr_cpu_ids) {
                if (up)
                        cpuquiet_wake_cpu(cpu);
@@ -256,7 +190,7 @@ static void runnables_device_busy(void)
 {
        if (runnables_state != DISABLED) {
                runnables_state = DISABLED;
-               cancel_work_sync(&runnables_work);
+               cancel_delayed_work_sync(&runnables_work);
        }
 }
 
@@ -264,14 +198,15 @@ static void runnables_device_free(void)
 {
        if (runnables_state == DISABLED) {
                runnables_state = IDLE;
-               mod_timer(&runnables_timer, jiffies + 1);
+               runnables_work_func(NULL);
        }
 }
 
 static void runnables_stop(void)
 {
        runnables_state = DISABLED;
-       cancel_work_sync(&runnables_work);
+       cancel_delayed_work_sync(&runnables_work);
+       destroy_workqueue(runnables_wq);
        kobject_put(runnables_kobject);
 }
 
@@ -283,10 +218,12 @@ static int runnables_start(void)
        if (err)
                return err;
 
-       INIT_WORK(&runnables_work, runnables_work_func);
+       runnables_wq = alloc_workqueue("cpuquiet-runnables",
+                       WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
+       if (!runnables_wq)
+               return -ENOMEM;
 
-       init_timer(&runnables_timer);
-       runnables_timer.function = runnables_avg_sampler;
+       INIT_DELAYED_WORK(&runnables_work, runnables_work_func);
 
        for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) {
                if (i < ARRAY_SIZE(default_thresholds))
@@ -299,7 +236,7 @@ static int runnables_start(void)
        }
 
        runnables_state = IDLE;
-       runnables_avg_sampler(0);
+       runnables_work_func(NULL);
 
        return 0;
 }
index 1afa4dd..8d95888 100644 (file)
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
        unsigned long avnrun[3];
+       unsigned long time_avnrun = avg_nr_running();
 
        get_avenrun(avnrun, FIXED_1/200, 0);
 
-       seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+       seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d %lu.%02lu\n",
                LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
                LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
                LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
                nr_running(), nr_threads,
-               task_active_pid_ns(current)->last_pid);
+               task_active_pid_ns(current)->last_pid,
+               LOAD_INT(time_avnrun), LOAD_FRAC(time_avnrun));
        return 0;
 }
 
index 3d18f24..76137cb 100644 (file)
@@ -139,7 +139,8 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
-extern u64 nr_running_integral(unsigned int cpu);
+extern unsigned long get_avg_nr_running(unsigned int cpu);
+extern unsigned long avg_nr_running(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern unsigned long this_cpu_load(void);
 
index 974c9b2..b8f4618 100644 (file)
@@ -2161,10 +2161,35 @@ unsigned long this_cpu_load(void)
        return this->cpu_load[0];
 }
 
-u64 nr_running_integral(unsigned int cpu)
+unsigned long avg_nr_running(void)
+{
+       unsigned long i, sum = 0;
+       unsigned int seqcnt, ave_nr_running;
+
+       for_each_online_cpu(i) {
+               struct rq *q = cpu_rq(i);
+
+               /*
+                * Update average to avoid reading stalled value if there were
+                * no run-queue changes for a long time. On the other hand if
+                * the changes are happening right now, just read current value
+                * directly.
+                */
+               seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+               ave_nr_running = do_avg_nr_running(q);
+               if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+                       read_seqcount_begin(&q->ave_seqcnt);
+                       ave_nr_running = q->ave_nr_running;
+               }
+
+               sum += ave_nr_running;
+       }
+
+       return sum;
+}
+
+unsigned long get_avg_nr_running(unsigned int cpu)
 {
-       unsigned int seqcnt;
-       u64 integral;
        struct rq *q;
 
        if (cpu >= nr_cpu_ids)
@@ -2172,21 +2197,7 @@ u64 nr_running_integral(unsigned int cpu)
 
        q = cpu_rq(cpu);
 
-       /*
-        * Update average to avoid reading stalled value if there were
-        * no run-queue changes for a long time. On the other hand if
-        * the changes are happening right now, just read current value
-        * directly.
-        */
-
-       seqcnt = read_seqcount_begin(&q->ave_seqcnt);
-       integral = do_nr_running_integral(q);
-       if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
-               read_seqcount_begin(&q->ave_seqcnt);
-               integral = q->nr_running_integral;
-       }
-
-       return integral;
+       return q->ave_nr_running;
 }
 
 /*
index 09acaa1..06d172e 100644 (file)
@@ -266,6 +266,9 @@ static void print_cpu(struct seq_file *m, int cpu)
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 
        P(nr_running);
+       SEQ_printf(m, "  .%-30s: %d.%03d   \n", "ave_nr_running",
+                  rq->ave_nr_running / FIXED_1,
+                  ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1);
        SEQ_printf(m, "  .%-30s: %lu\n", "load",
                   rq->load.weight);
        P(nr_switches);
index 9958941..ef5a1ff 100644 (file)
@@ -365,7 +365,7 @@ struct rq {
 
        /* time-based average load */
        u64 nr_last_stamp;
-       u64 nr_running_integral;
+       unsigned int ave_nr_running;
        seqcount_t ave_seqcnt;
 
        /* capture load from *all* tasks on this cpu: */
@@ -924,26 +924,32 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
  * 25 ~=  33554432ns =  33.5ms
  * 24 ~=  16777216ns =  16.8ms
  */
+#define NR_AVE_PERIOD_EXP      27
 #define NR_AVE_SCALE(x)                ((x) << FSHIFT)
+#define NR_AVE_PERIOD          (1 << NR_AVE_PERIOD_EXP)
+#define NR_AVE_DIV_PERIOD(x)   ((x) >> NR_AVE_PERIOD_EXP)
 
-
-static inline u64 do_nr_running_integral(struct rq *rq)
+static inline unsigned int do_avg_nr_running(struct rq *rq)
 {
        s64 nr, deltax;
-       u64 nr_running_integral = rq->nr_running_integral;
+       unsigned int ave_nr_running = rq->ave_nr_running;
 
        deltax = rq->clock_task - rq->nr_last_stamp;
        nr = NR_AVE_SCALE(rq->nr_running);
 
-       nr_running_integral += nr * deltax;
+       if (deltax > NR_AVE_PERIOD)
+               ave_nr_running = nr;
+       else
+               ave_nr_running +=
+                       NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running));
 
-       return nr_running_integral;
+       return ave_nr_running;
 }
 
 static inline void inc_nr_running(struct rq *rq)
 {
        write_seqcount_begin(&rq->ave_seqcnt);
-       rq->nr_running_integral = do_nr_running_integral(rq);
+       rq->ave_nr_running = do_avg_nr_running(rq);
        rq->nr_last_stamp = rq->clock_task;
        rq->nr_running++;
        write_seqcount_end(&rq->ave_seqcnt);
@@ -952,7 +958,7 @@ static inline void inc_nr_running(struct rq *rq)
 static inline void dec_nr_running(struct rq *rq)
 {
        write_seqcount_begin(&rq->ave_seqcnt);
-       rq->nr_running_integral = do_nr_running_integral(rq);
+       rq->ave_nr_running = do_avg_nr_running(rq);
        rq->nr_last_stamp = rq->clock_task;
        rq->nr_running--;
        write_seqcount_end(&rq->ave_seqcnt);