scheduler: Re-compute time-average nr_running on read
Varun Wadekar [Wed, 11 Jul 2012 08:58:06 +0000 (13:58 +0530)]
Port commit 1802afb2a (http://git-master/r/111637) from v3.1

Re-compute time-average nr_running when it is read. This would
prevent reading stalled average value if there were no run-queue
changes for a long time. New average value is returned to the reader,
but not stored to avoid concurrent writes. Light-weight sequential
counter synchronization is used to assure data consistency for
re-computing average.

Original author: Alex Frid <afrid@nvidia.com>

Signed-off-by: Alex Frid <afrid@nvidia.com>
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>

Change-Id: Ic486006d62436fb61cda4ab6897e933f5c102b52

kernel/sched/core.c
kernel/sched/sched.h

index 6de8478..8ef3970 100644 (file)
@@ -2161,6 +2161,32 @@ unsigned long this_cpu_load(void)
        return this->cpu_load[0];
 }
 
+unsigned long avg_nr_running(void)
+{
+       unsigned long i, sum = 0;
+       unsigned int seqcnt, ave_nr_running;
+
+       for_each_online_cpu(i) {
+               struct rq *q = cpu_rq(i);
+
+               /*
+                * Update average to avoid reading stalled value if there were
+                * no run-queue changes for a long time. On the other hand if
+                * the changes are happening right now, just read current value
+                * directly.
+                */
+               seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+               ave_nr_running = do_avg_nr_running(q);
+               if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+                       read_seqcount_begin(&q->ave_seqcnt);
+                       ave_nr_running = q->ave_nr_running;
+               }
+
+               sum += ave_nr_running;
+       }
+
+       return sum;
+}
 
 /* Variables and functions for calc_load */
 static atomic_long_t calc_load_tasks;
index cc66a79..2901fc7 100644 (file)
@@ -366,6 +366,7 @@ struct rq {
        /* time-based average load */
        u64 nr_last_stamp;
        unsigned int ave_nr_running;
+       seqcount_t ave_seqcnt;
 
        /* capture load from *all* tasks on this cpu: */
        struct load_weight load;
@@ -928,41 +929,39 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
 #define NR_AVE_PERIOD          (1 << NR_AVE_PERIOD_EXP)
 #define NR_AVE_DIV_PERIOD(x)   ((x) >> NR_AVE_PERIOD_EXP)
 
-static inline void do_avg_nr_running(struct rq *rq)
+static inline unsigned int do_avg_nr_running(struct rq *rq)
 {
        s64 nr, deltax;
+       unsigned int ave_nr_running = rq->ave_nr_running;
 
        deltax = rq->clock_task - rq->nr_last_stamp;
-       rq->nr_last_stamp = rq->clock_task;
        nr = NR_AVE_SCALE(rq->nr_running);
 
        if (deltax > NR_AVE_PERIOD)
-               rq->ave_nr_running = nr;
+               ave_nr_running = nr;
        else
-               rq->ave_nr_running +=
-                       NR_AVE_DIV_PERIOD(deltax * (nr - rq->ave_nr_running));
+               ave_nr_running +=
+                       NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running));
+
+       return ave_nr_running;
 }
 
 static inline void inc_nr_running(struct rq *rq)
 {
-       do_avg_nr_running(rq);
+       write_seqcount_begin(&rq->ave_seqcnt);
+       rq->ave_nr_running = do_avg_nr_running(rq);
+       rq->nr_last_stamp = rq->clock_task;
        rq->nr_running++;
+       write_seqcount_end(&rq->ave_seqcnt);
 }
 
 static inline void dec_nr_running(struct rq *rq)
 {
-       do_avg_nr_running(rq);
+       write_seqcount_begin(&rq->ave_seqcnt);
+       rq->ave_nr_running = do_avg_nr_running(rq);
+       rq->nr_last_stamp = rq->clock_task;
        rq->nr_running--;
-}
-
-unsigned long avg_nr_running(void)
-{
-       unsigned long i, sum = 0;
-
-       for_each_online_cpu(i)
-               sum += cpu_rq(i)->ave_nr_running;
-
-       return sum;
+       write_seqcount_end(&rq->ave_seqcnt);
 }
 
 extern void update_rq_clock(struct rq *rq);