[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection
Shailabh Nagar [Fri, 14 Jul 2006 07:24:37 +0000 (00:24 -0700)]
Unlike earlier iterations of the delay accounting patches, now delays are only
collected for the actual I/O waits rather than try and cover the delays seen
in I/O submission paths.

Account separately for block I/O delays incurred as a result of swapin page
faults whose frequency can be affected by the task/process' rss limit.  Hence
swapin delays can act as feedback for rss limit changes independent of I/O
priority changes.

Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

include/linux/delayacct.h
include/linux/sched.h
kernel/delayacct.c
kernel/sched.c
mm/memory.c

index 9572cfa..0ecbf9a 100644 (file)
 
 #include <linux/sched.h>
 
+/*
+ * Per-task flags relevant to delay accounting
+ * maintained privately to avoid exhausting similar flags in sched.h:PF_*
+ * Used to set current->delays->flags
+ */
+#define DELAYACCT_PF_SWAPIN    0x00000001      /* I am doing a swapin */
+
 #ifdef CONFIG_TASK_DELAY_ACCT
 
 extern int delayacct_on;       /* Delay accounting turned on/off */
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache;
 extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
+extern void __delayacct_blkio_start(void);
+extern void __delayacct_blkio_end(void);
 
 static inline void delayacct_set_flag(int flag)
 {
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk)
                __delayacct_tsk_exit(tsk);
 }
 
+static inline void delayacct_blkio_start(void)
+{
+       if (current->delays)
+               __delayacct_blkio_start();
+}
+
+static inline void delayacct_blkio_end(void)
+{
+       if (current->delays)
+               __delayacct_blkio_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk)
 {}
 static inline void delayacct_tsk_exit(struct task_struct *tsk)
 {}
+static inline void delayacct_blkio_start(void)
+{}
+static inline void delayacct_blkio_end(void)
+{}
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
index 7a54e62..2f43f1f 100644 (file)
@@ -566,6 +566,19 @@ struct task_delay_info {
         * Atomicity of updates to XXX_delay, XXX_count protected by
         * single lock above (split into XXX_lock if contention is an issue).
         */
+
+       /*
+        * XXX_count is incremented on every XXX operation, the delay
+        * associated with the operation is added to XXX_delay.
+        * XXX_delay contains the accumulated delay time in nanoseconds.
+        */
+       struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
+       u64 blkio_delay;        /* wait for sync block io completion */
+       u64 swapin_delay;       /* wait for swapin block io completion */
+       u32 blkio_count;        /* total count of the number of sync block */
+                               /* io operations performed */
+       u32 swapin_count;       /* total count of the number of swapin block */
+                               /* io operations performed */
 };
 #endif
 
index fbf7f22..3546b08 100644 (file)
@@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
        spin_unlock(&current->delays->lock);
 }
 
+void __delayacct_blkio_start(void)
+{
+       delayacct_start(&current->delays->blkio_start);
+}
+
+void __delayacct_blkio_end(void)
+{
+       if (current->delays->flags & DELAYACCT_PF_SWAPIN)
+               /* Swapin block I/O */
+               delayacct_end(&current->delays->blkio_start,
+                       &current->delays->blkio_end,
+                       &current->delays->swapin_delay,
+                       &current->delays->swapin_count);
+       else    /* Other block I/O */
+               delayacct_end(&current->delays->blkio_start,
+                       &current->delays->blkio_end,
+                       &current->delays->blkio_delay,
+                       &current->delays->blkio_count);
+}
index e9a0b61..9d42cbf 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/times.h>
 #include <linux/acct.h>
 #include <linux/kprobes.h>
+#include <linux/delayacct.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -4534,9 +4535,11 @@ void __sched io_schedule(void)
 {
        struct rq *rq = &__raw_get_cpu_var(runqueues);
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        schedule();
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
 }
 EXPORT_SYMBOL(io_schedule);
 
@@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout)
        struct rq *rq = &__raw_get_cpu_var(runqueues);
        long ret;
 
+       delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
        ret = schedule_timeout(timeout);
        atomic_dec(&rq->nr_iowait);
+       delayacct_blkio_end();
        return ret;
 }
 
index de8bc85..109e986 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
+#include <linux/delayacct.h>
 #include <linux/init.h>
 
 #include <asm/pgalloc.h>
@@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                migration_entry_wait(mm, pmd, address);
                goto out;
        }
+       delayacct_set_flag(DELAYACCT_PF_SWAPIN);
        page = lookup_swap_cache(entry);
        if (!page) {
                swapin_readahead(entry, address, vma);
@@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
                        if (likely(pte_same(*page_table, orig_pte)))
                                ret = VM_FAULT_OOM;
+                       delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
                        goto unlock;
                }
 
@@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                grab_swap_token();
        }
 
+       delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
        mark_page_accessed(page);
        lock_page(page);