sched: Provide iowait counters
Arjan van de Ven [Mon, 20 Jul 2009 18:26:58 +0000 (11:26 -0700)]
For counting how long an application has been waiting for
(disk) IO, there currently is only the HZ sample driven
information available, while for all other counters in this
class, a high resolution version is available via
CONFIG_SCHEDSTATS.

In order to make an improved bootchart tool possible, we also
need a higher resolution version of the iowait time.

This patch below adds this scheduler statistic to the kernel.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4A64B813.1080506@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/sched.h
kernel/sched.c
kernel/sched_debug.c
kernel/sched_fair.c

index e209ae0..9c96ef2 100644 (file)
@@ -1111,6 +1111,8 @@ struct sched_entity {
        u64                     wait_max;
        u64                     wait_count;
        u64                     wait_sum;
+       u64                     iowait_count;
+       u64                     iowait_sum;
 
        u64                     sleep_start;
        u64                     sleep_max;
@@ -1231,6 +1233,8 @@ struct task_struct {
        unsigned did_exec:1;
        unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                 * execve */
+       unsigned in_iowait:1;
+
 
        /* Revert to default priority/policy when forking */
        unsigned sched_reset_on_fork:1;
index 6244d24..38d05a8 100644 (file)
@@ -6754,7 +6754,9 @@ void __sched io_schedule(void)
 
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+       current->in_iowait = 1;
        schedule();
+       current->in_iowait = 0;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
 }
@@ -6767,7 +6769,9 @@ long __sched io_schedule_timeout(long timeout)
 
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
+       current->in_iowait = 1;
        ret = schedule_timeout(timeout);
+       current->in_iowait = 0;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
        return ret;
index 70c7e0b..5ddbd08 100644 (file)
@@ -409,6 +409,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.wait_max);
        PN(se.wait_sum);
        P(se.wait_count);
+       PN(se.iowait_sum);
+       P(se.iowait_count);
        P(sched_info.bkl_count);
        P(se.nr_migrations);
        P(se.nr_migrations_cold);
@@ -479,6 +481,8 @@ void proc_sched_set_task(struct task_struct *p)
        p->se.wait_max                          = 0;
        p->se.wait_sum                          = 0;
        p->se.wait_count                        = 0;
+       p->se.iowait_sum                        = 0;
+       p->se.iowait_count                      = 0;
        p->se.sleep_max                         = 0;
        p->se.sum_sleep_runtime                 = 0;
        p->se.block_max                         = 0;
index 342000b..471fa28 100644 (file)
@@ -652,6 +652,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
                se->sum_sleep_runtime += delta;
 
                if (tsk) {
+                       if (tsk->in_iowait) {
+                               se->iowait_sum += delta;
+                               se->iowait_count++;
+                       }
+
                        /*
                         * Blocking time is in units of nanosecs, so shift by
                         * 20 to get a milliseconds-range estimation of the