sched: debug, improve migration statistics
Ingo Molnar [Mon, 15 Oct 2007 15:00:18 +0000 (17:00 +0200)]
add new migration statistics when SCHED_DEBUG and SCHEDSTATS
is enabled. Available in /proc/<PID>/sched.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/sched.h
kernel/sched.c
kernel/sched_debug.c

index fcc9a5a..3a6e05e 100644 (file)
@@ -931,6 +931,24 @@ struct sched_entity {
        u64                     block_max;
        u64                     exec_max;
        u64                     slice_max;
+
+       u64                     nr_migrations;
+       u64                     nr_migrations_cold;
+       u64                     nr_failed_migrations_affine;
+       u64                     nr_failed_migrations_running;
+       u64                     nr_failed_migrations_hot;
+       u64                     nr_forced_migrations;
+       u64                     nr_forced2_migrations;
+
+       u64                     nr_wakeups;
+       u64                     nr_wakeups_sync;
+       u64                     nr_wakeups_migrate;
+       u64                     nr_wakeups_local;
+       u64                     nr_wakeups_remote;
+       u64                     nr_wakeups_affine;
+       u64                     nr_wakeups_affine_attempts;
+       u64                     nr_wakeups_passive;
+       u64                     nr_wakeups_idle;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
index 945ab13..3b27c3a 100644 (file)
@@ -1005,6 +1005,23 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #ifdef CONFIG_SMP
 
+/*
+ * Is this task likely cache-hot:
+ */
+static inline int
+task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+{
+       s64 delta;
+
+       if (p->sched_class != &fair_sched_class)
+               return 0;
+
+       delta = now - p->se.exec_start;
+
+       return delta < (s64)sysctl_sched_migration_cost;
+}
+
+
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
        int old_cpu = task_cpu(p);
@@ -1022,6 +1039,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
                p->se.sleep_start -= clock_offset;
        if (p->se.block_start)
                p->se.block_start -= clock_offset;
+       if (old_cpu != new_cpu) {
+               schedstat_inc(p, se.nr_migrations);
+               if (task_hot(p, old_rq->clock, NULL))
+                       schedstat_inc(p, se.nr_forced2_migrations);
+       }
 #endif
        p->se.vruntime -= old_cfsrq->min_vruntime -
                                         new_cfsrq->min_vruntime;
@@ -1394,8 +1416,13 @@ static int wake_idle(int cpu, struct task_struct *p)
                if (sd->flags & SD_WAKE_IDLE) {
                        cpus_and(tmp, sd->span, p->cpus_allowed);
                        for_each_cpu_mask(i, tmp) {
-                               if (idle_cpu(i))
+                               if (idle_cpu(i)) {
+                                       if (i != task_cpu(p)) {
+                                               schedstat_inc(p,
+                                                       se.nr_wakeups_idle);
+                                       }
                                        return i;
+                               }
                        }
                } else {
                        break;
@@ -1426,7 +1453,7 @@ static inline int wake_idle(int cpu, struct task_struct *p)
  */
 static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 {
-       int cpu, this_cpu, success = 0;
+       int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
        long old_state;
        struct rq *rq;
@@ -1445,6 +1472,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
                goto out_running;
 
        cpu = task_cpu(p);
+       orig_cpu = cpu;
        this_cpu = smp_processor_id();
 
 #ifdef CONFIG_SMP
@@ -1488,6 +1516,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
                        unsigned long tl = this_load;
                        unsigned long tl_per_task;
 
+                       schedstat_inc(p, se.nr_wakeups_affine_attempts);
                        tl_per_task = cpu_avg_load_per_task(this_cpu);
 
                        /*
@@ -1507,6 +1536,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
                                 * there is no bad imbalance.
                                 */
                                schedstat_inc(this_sd, ttwu_move_affine);
+                               schedstat_inc(p, se.nr_wakeups_affine);
                                goto out_set_cpu;
                        }
                }
@@ -1518,6 +1548,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
                if (this_sd->flags & SD_WAKE_BALANCE) {
                        if (imbalance*this_load <= 100*load) {
                                schedstat_inc(this_sd, ttwu_move_balance);
+                               schedstat_inc(p, se.nr_wakeups_passive);
                                goto out_set_cpu;
                        }
                }
@@ -1543,6 +1574,15 @@ out_set_cpu:
 
 out_activate:
 #endif /* CONFIG_SMP */
+       schedstat_inc(p, se.nr_wakeups);
+       if (sync)
+               schedstat_inc(p, se.nr_wakeups_sync);
+       if (orig_cpu != cpu)
+               schedstat_inc(p, se.nr_wakeups_migrate);
+       if (cpu == this_cpu)
+               schedstat_inc(p, se.nr_wakeups_local);
+       else
+               schedstat_inc(p, se.nr_wakeups_remote);
        update_rq_clock(rq);
        activate_task(rq, p, 1);
        /*
@@ -2119,22 +2159,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
 }
 
 /*
- * Is this task likely cache-hot:
- */
-static inline int
-task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
-{
-       s64 delta;
-
-       if (p->sched_class != &fair_sched_class)
-               return 0;
-
-       delta = now - p->se.exec_start;
-
-       return delta < (s64)sysctl_sched_migration_cost;
-}
-
-/*
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
@@ -2148,12 +2172,16 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         * 2) cannot be migrated to this CPU due to cpus_allowed, or
         * 3) are cache-hot on their current CPU.
         */
-       if (!cpu_isset(this_cpu, p->cpus_allowed))
+       if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+               schedstat_inc(p, se.nr_failed_migrations_affine);
                return 0;
+       }
        *all_pinned = 0;
 
-       if (task_running(rq, p))
+       if (task_running(rq, p)) {
+               schedstat_inc(p, se.nr_failed_migrations_running);
                return 0;
+       }
 
        /*
         * Aggressive migration if:
@@ -2163,14 +2191,18 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 
        if (sd->nr_balance_failed > sd->cache_nice_tries) {
 #ifdef CONFIG_SCHEDSTATS
-               if (task_hot(p, rq->clock, sd))
+               if (task_hot(p, rq->clock, sd)) {
                        schedstat_inc(sd, lb_hot_gained[idle]);
+                       schedstat_inc(p, se.nr_forced_migrations);
+               }
 #endif
                return 1;
        }
 
-       if (task_hot(p, rq->clock, sd))
+       if (task_hot(p, rq->clock, sd)) {
+               schedstat_inc(p, se.nr_failed_migrations_hot);
                return 0;
+       }
        return 1;
 }
 
index 7558159..27e82cb 100644 (file)
@@ -260,6 +260,7 @@ __initcall(init_sched_debug_procfs);
 
 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 {
+       unsigned long nr_switches;
        unsigned long flags;
        int num_threads = 1;
 
@@ -273,8 +274,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
        SEQ_printf(m,
                "---------------------------------------------------------\n");
+#define __P(F) \
+       SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
 #define P(F) \
        SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
+#define __PN(F) \
+       SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 #define PN(F) \
        SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
@@ -282,6 +287,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.vruntime);
        PN(se.sum_exec_runtime);
 
+       nr_switches = p->nvcsw + p->nivcsw;
+
 #ifdef CONFIG_SCHEDSTATS
        PN(se.wait_start);
        PN(se.sleep_start);
@@ -292,14 +299,55 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.slice_max);
        PN(se.wait_max);
        P(sched_info.bkl_count);
+       P(se.nr_migrations);
+       P(se.nr_migrations_cold);
+       P(se.nr_failed_migrations_affine);
+       P(se.nr_failed_migrations_running);
+       P(se.nr_failed_migrations_hot);
+       P(se.nr_forced_migrations);
+       P(se.nr_forced2_migrations);
+       P(se.nr_wakeups);
+       P(se.nr_wakeups_sync);
+       P(se.nr_wakeups_migrate);
+       P(se.nr_wakeups_local);
+       P(se.nr_wakeups_remote);
+       P(se.nr_wakeups_affine);
+       P(se.nr_wakeups_affine_attempts);
+       P(se.nr_wakeups_passive);
+       P(se.nr_wakeups_idle);
+
+       {
+               u64 avg_atom, avg_per_cpu;
+
+               avg_atom = p->se.sum_exec_runtime;
+               if (nr_switches)
+                       do_div(avg_atom, nr_switches);
+               else
+                       avg_atom = -1LL;
+
+               avg_per_cpu = p->se.sum_exec_runtime;
+               if (p->se.nr_migrations)
+                       avg_per_cpu = div64_64(avg_per_cpu, p->se.nr_migrations);
+               else
+                       avg_per_cpu = -1LL;
+
+               __PN(avg_atom);
+               __PN(avg_per_cpu);
+       }
 #endif
+       __P(nr_switches);
        SEQ_printf(m, "%-35s:%21Ld\n",
-                  "nr_switches", (long long)(p->nvcsw + p->nivcsw));
+                  "nr_voluntary_switches", (long long)p->nvcsw);
+       SEQ_printf(m, "%-35s:%21Ld\n",
+                  "nr_involuntary_switches", (long long)p->nivcsw);
+
        P(se.load.weight);
        P(policy);
        P(prio);
-#undef P
 #undef PN
+#undef __PN
+#undef P
+#undef __P
 
        {
                u64 t0, t1;
@@ -314,13 +362,32 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-       p->se.sleep_max                 = 0;
-       p->se.block_max                 = 0;
-       p->se.exec_max                  = 0;
-       p->se.slice_max                 = 0;
-       p->se.wait_max                  = 0;
-       p->sched_info.bkl_count         = 0;
+       p->se.wait_max                          = 0;
+       p->se.sleep_max                         = 0;
+       p->se.sum_sleep_runtime                 = 0;
+       p->se.block_max                         = 0;
+       p->se.exec_max                          = 0;
+       p->se.slice_max                         = 0;
+       p->se.nr_migrations                     = 0;
+       p->se.nr_migrations_cold                = 0;
+       p->se.nr_failed_migrations_affine       = 0;
+       p->se.nr_failed_migrations_running      = 0;
+       p->se.nr_failed_migrations_hot          = 0;
+       p->se.nr_forced_migrations              = 0;
+       p->se.nr_forced2_migrations             = 0;
+       p->se.nr_wakeups                        = 0;
+       p->se.nr_wakeups_sync                   = 0;
+       p->se.nr_wakeups_migrate                = 0;
+       p->se.nr_wakeups_local                  = 0;
+       p->se.nr_wakeups_remote                 = 0;
+       p->se.nr_wakeups_affine                 = 0;
+       p->se.nr_wakeups_affine_attempts        = 0;
+       p->se.nr_wakeups_passive                = 0;
+       p->se.nr_wakeups_idle                   = 0;
+       p->sched_info.bkl_count                 = 0;
 #endif
-       p->se.sum_exec_runtime          = 0;
-       p->se.prev_sum_exec_runtime     = 0;
+       p->se.sum_exec_runtime                  = 0;
+       p->se.prev_sum_exec_runtime             = 0;
+       p->nvcsw                                = 0;
+       p->nivcsw                               = 0;
 }