]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - kernel/watchdog.c
cgroup: fix to break the while loop in cgroup_attach_task() correctly
[linux-3.10.git] / kernel / watchdog.c
index 4b1dfba70f7cf8ae7397623656a9b695028f702a..e092e5a6cdd753a0368215204b33daec16b7c2ba 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
+#include <linux/smpboot.h>
+#include <linux/sched/rt.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
 
 int watchdog_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
+static int __read_mostly watchdog_disabled;
+static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static cpumask_t __read_mostly watchdog_cpus;
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
 
@@ -108,12 +118,12 @@ static int get_softlockup_thresh(void)
  * resolution, and we don't need to waste time with a big divide when
  * 2^30ns == 1.074s.
  */
-static unsigned long get_timestamp(int this_cpu)
+static unsigned long get_timestamp(void)
 {
-       return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
+       return local_clock() >> 30LL;  /* 2^30 ~= 10^9 */
 }
 
-static unsigned long get_sample_period(void)
+static void set_sample_period(void)
 {
        /*
         * convert watchdog_thresh from seconds to ns
@@ -122,15 +132,13 @@ static unsigned long get_sample_period(void)
         * and hard thresholds) to increment before the
         * hardlockup detector generates a warning
         */
-       return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
+       sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
-       int this_cpu = smp_processor_id();
-
-       __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
+       __this_cpu_write(watchdog_touch_ts, get_timestamp());
 }
 
 void touch_softlockup_watchdog(void)
@@ -175,7 +183,7 @@ void touch_softlockup_watchdog_sync(void)
        __raw_get_cpu_var(watchdog_touch_ts) = 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 /* watchdog detector functions */
 static int is_hardlockup(void)
 {
@@ -189,9 +197,79 @@ static int is_hardlockup(void)
 }
 #endif
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+       cpumask_t cpus = watchdog_cpus;
+       unsigned int next_cpu;
+
+       next_cpu = cpumask_next(cpu, &cpus);
+       if (next_cpu >= nr_cpu_ids)
+               next_cpu = cpumask_first(&cpus);
+
+       if (next_cpu == cpu)
+               return nr_cpu_ids;
+
+       return next_cpu;
+}
+
+static int is_hardlockup_other_cpu(unsigned int cpu)
+{
+       unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+
+       if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+               return 1;
+
+       per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+       return 0;
+}
+
+static void watchdog_check_hardlockup_other_cpu(void)
+{
+       unsigned int next_cpu;
+
+       /*
+        * Test for hardlockups every 3 samples.  The sample period is
+        *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+        *  watchdog_thresh (over by 20%).
+        */
+       if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
+               return;
+
+       /* check for a hardlockup on the next cpu */
+       next_cpu = watchdog_next_cpu(smp_processor_id());
+       if (next_cpu >= nr_cpu_ids)
+               return;
+
+       smp_rmb();
+
+       if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
+               per_cpu(watchdog_nmi_touch, next_cpu) = false;
+               return;
+       }
+
+       if (is_hardlockup_other_cpu(next_cpu)) {
+               /* only warn once */
+               if (per_cpu(hard_watchdog_warn, next_cpu) == true)
+                       return;
+
+               if (hardlockup_panic)
+                       panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+               else
+                       WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
+
+               per_cpu(hard_watchdog_warn, next_cpu) = true;
+       } else {
+               per_cpu(hard_watchdog_warn, next_cpu) = false;
+       }
+}
+#else
+static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
+#endif
+
 static int is_softlockup(unsigned long touch_ts)
 {
-       unsigned long now = get_timestamp(smp_processor_id());
+       unsigned long now = get_timestamp();
 
        /* Warn about unreasonable delays: */
        if (time_after(now, touch_ts + get_softlockup_thresh()))
@@ -200,7 +278,7 @@ static int is_softlockup(unsigned long touch_ts)
        return 0;
 }
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 
 static struct perf_event_attr wd_hw_attr = {
        .type           = PERF_TYPE_HARDWARE,
@@ -248,13 +326,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
        __this_cpu_write(hard_watchdog_warn, false);
        return;
 }
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
+
 static void watchdog_interrupt_count(void)
 {
        __this_cpu_inc(hrtimer_interrupts);
 }
-#else
-static inline void watchdog_interrupt_count(void) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+static int watchdog_nmi_enable(unsigned int cpu);
+static void watchdog_nmi_disable(unsigned int cpu);
 
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
@@ -266,11 +346,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        /* kick the hardlockup detector */
        watchdog_interrupt_count();
 
+       /* test for hardlockups on the next cpu */
+       watchdog_check_hardlockup_other_cpu();
+
        /* kick the softlockup detector */
        wake_up_process(__this_cpu_read(softlockup_watchdog));
 
        /* .. and repeat */
-       hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+       hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
        if (touch_ts == 0) {
                if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
@@ -327,51 +410,70 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        return HRTIMER_RESTART;
 }
 
+static void watchdog_set_prio(unsigned int policy, unsigned int prio)
+{
+       struct sched_param param = { .sched_priority = prio };
 
-/*
- * The watchdog thread - touches the timestamp.
- */
-static int watchdog(void *unused)
+       sched_setscheduler(current, policy, &param);
+}
+
+static void watchdog_enable(unsigned int cpu)
 {
-       struct sched_param param = { .sched_priority = 0 };
        struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
-       /* initialize timestamp */
-       __touch_watchdog();
-
        /* kick off the timer for the hardlockup detector */
+       hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       hrtimer->function = watchdog_timer_fn;
+
+       if (!watchdog_enabled) {
+               kthread_park(current);
+               return;
+       }
+
+       /* Enable the perf event */
+       watchdog_nmi_enable(cpu);
+
        /* done here because hrtimer_start can only pin to smp_processor_id() */
-       hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
+       hrtimer_start(hrtimer, ns_to_ktime(sample_period),
                      HRTIMER_MODE_REL_PINNED);
 
-       set_current_state(TASK_INTERRUPTIBLE);
-       /*
-        * Run briefly (kicked by the hrtimer callback function) once every
-        * get_sample_period() seconds (4 seconds by default) to reset the
-        * softlockup timestamp. If this gets delayed for more than
-        * 2*watchdog_thresh seconds then the debug-printout triggers in
-        * watchdog_timer_fn().
-        */
-       while (!kthread_should_stop()) {
-               __touch_watchdog();
-               schedule();
+       /* initialize timestamp */
+       watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
+       __touch_watchdog();
+}
 
-               if (kthread_should_stop())
-                       break;
+static void watchdog_disable(unsigned int cpu)
+{
+       struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
-               set_current_state(TASK_INTERRUPTIBLE);
-       }
-       /*
-        * Drop the policy/priority elevation during thread exit to avoid a
-        * scheduling latency spike.
-        */
-       __set_current_state(TASK_RUNNING);
-       sched_setscheduler(current, SCHED_NORMAL, &param);
-       return 0;
+       watchdog_set_prio(SCHED_NORMAL, 0);
+       hrtimer_cancel(hrtimer);
+       /* disable the perf event */
+       watchdog_nmi_disable(cpu);
 }
 
+static int watchdog_should_run(unsigned int cpu)
+{
+       return __this_cpu_read(hrtimer_interrupts) !=
+               __this_cpu_read(soft_lockup_hrtimer_cnt);
+}
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/*
+ * The watchdog thread function - touches the timestamp.
+ *
+ * It only runs once every sample_period seconds (4 seconds by
+ * default) to reset the softlockup timestamp. If this gets delayed
+ * for more than 2*watchdog_thresh seconds then the debug-printout
+ * triggers in watchdog_timer_fn().
+ */
+static void watchdog(unsigned int cpu)
+{
+       __this_cpu_write(soft_lockup_hrtimer_cnt,
+                        __this_cpu_read(hrtimer_interrupts));
+       __touch_watchdog();
+}
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
 /*
  * People like the simple clean cpu node info on boot.
  * Reduce the watchdog noise by only printing messages
@@ -379,7 +481,7 @@ static int watchdog(void *unused)
  */
 static unsigned long cpu0_err;
 
-static int watchdog_nmi_enable(int cpu)
+static int watchdog_nmi_enable(unsigned int cpu)
 {
        struct perf_event_attr *wd_attr;
        struct perf_event *event = per_cpu(watchdog_ev, cpu);
@@ -433,7 +535,7 @@ out:
        return 0;
 }
 
-static void watchdog_nmi_disable(int cpu)
+static void watchdog_nmi_disable(unsigned int cpu)
 {
        struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
@@ -447,107 +549,70 @@ static void watchdog_nmi_disable(int cpu)
        return;
 }
 #else
-static int watchdog_nmi_enable(int cpu) { return 0; }
-static void watchdog_nmi_disable(int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-
-/* prepare/enable/disable routines */
-static void watchdog_prepare_cpu(int cpu)
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
+static int watchdog_nmi_enable(unsigned int cpu)
 {
-       struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
-
-       WARN_ON(per_cpu(softlockup_watchdog, cpu));
-       hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       hrtimer->function = watchdog_timer_fn;
-}
-
-static int watchdog_enable(int cpu)
-{
-       struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
-       int err = 0;
-
-       /* enable the perf event */
-       err = watchdog_nmi_enable(cpu);
-
-       /* Regardless of err above, fall through and start softlockup */
-
-       /* create the watchdog thread */
-       if (!p) {
-               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-               p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
-               if (IS_ERR(p)) {
-                       pr_err("softlockup watchdog for %i failed\n", cpu);
-                       if (!err) {
-                               /* if hardlockup hasn't already set this */
-                               err = PTR_ERR(p);
-                               /* and disable the perf event */
-                               watchdog_nmi_disable(cpu);
-                       }
-                       goto out;
-               }
-               sched_setscheduler(p, SCHED_FIFO, &param);
-               kthread_bind(p, cpu);
-               per_cpu(watchdog_touch_ts, cpu) = 0;
-               per_cpu(softlockup_watchdog, cpu) = p;
-               wake_up_process(p);
-       }
-
-out:
-       return err;
+       /*
+        * The new cpu will be marked online before the first hrtimer interrupt
+        * runs on it.  If another cpu tests for a hardlockup on the new cpu
+        * before it has run its first hrtimer, it will get a false positive.
+        * Touch the watchdog on the new cpu to delay the first check for at
+        * least 3 sampling periods to guarantee one hrtimer has run on the new
+        * cpu.
+        */
+       per_cpu(watchdog_nmi_touch, cpu) = true;
+       smp_wmb();
+       cpumask_set_cpu(cpu, &watchdog_cpus);
+       return 0;
 }
 
-static void watchdog_disable(int cpu)
+static void watchdog_nmi_disable(unsigned int cpu)
 {
-       struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
-       struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
+       unsigned int next_cpu = watchdog_next_cpu(cpu);
 
        /*
-        * cancel the timer first to stop incrementing the stats
-        * and waking up the kthread
+        * Offlining this cpu will cause the cpu before this one to start
+        * checking the one after this one.  If this cpu just finished checking
+        * the next cpu and updating hrtimer_interrupts_saved, and then the
+        * previous cpu checks it within one sample period, it will trigger a
+        * false positive.  Touch the watchdog on the next cpu to prevent it.
         */
-       hrtimer_cancel(hrtimer);
-
-       /* disable the perf event */
-       watchdog_nmi_disable(cpu);
-
-       /* stop the watchdog thread */
-       if (p) {
-               per_cpu(softlockup_watchdog, cpu) = NULL;
-               kthread_stop(p);
-       }
+       if (next_cpu < nr_cpu_ids)
+               per_cpu(watchdog_nmi_touch, next_cpu) = true;
+       smp_wmb();
+       cpumask_clear_cpu(cpu, &watchdog_cpus);
 }
+#else
+static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
+static void watchdog_nmi_disable(unsigned int cpu) { return; }
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
 
+/* prepare/enable/disable routines */
 /* sysctl functions */
 #ifdef CONFIG_SYSCTL
 static void watchdog_enable_all_cpus(void)
 {
-       int cpu;
-
-       watchdog_enabled = 0;
-
-       for_each_online_cpu(cpu)
-               if (!watchdog_enable(cpu))
-                       /* if any cpu succeeds, watchdog is considered
-                          enabled for the system */
-                       watchdog_enabled = 1;
-
-       if (!watchdog_enabled)
-               pr_err("failed to be enabled on some cpus\n");
+       unsigned int cpu;
 
+       if (watchdog_disabled) {
+               watchdog_disabled = 0;
+               for_each_online_cpu(cpu)
+                       kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+       }
 }
 
 static void watchdog_disable_all_cpus(void)
 {
-       int cpu;
-
-       for_each_online_cpu(cpu)
-               watchdog_disable(cpu);
+       unsigned int cpu;
 
-       /* if all watchdogs are disabled, then they are disabled for the system */
-       watchdog_enabled = 0;
+       if (!watchdog_disabled) {
+               watchdog_disabled = 1;
+               for_each_online_cpu(cpu)
+                       kthread_park(per_cpu(softlockup_watchdog, cpu));
+       }
 }
 
-
 /*
  * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
  */
@@ -557,73 +622,43 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 {
        int ret;
 
+       if (watchdog_disabled < 0)
+               return -ENODEV;
+
        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret || !write)
-               goto out;
+               return ret;
 
+       set_sample_period();
+       /*
+        * Watchdog threads shouldn't be enabled if they are
+        * disabled. The 'watchdog_disabled' variable check in
+        * watchdog_*_all_cpus() function takes care of this.
+        */
        if (watchdog_enabled && watchdog_thresh)
                watchdog_enable_all_cpus();
        else
                watchdog_disable_all_cpus();
 
-out:
        return ret;
 }
 #endif /* CONFIG_SYSCTL */
 
-
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-       int hotcpu = (unsigned long)hcpu;
-
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               watchdog_prepare_cpu(hotcpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               if (watchdog_enabled)
-                       watchdog_enable(hotcpu);
-               break;
-#ifdef CONFIG_HOTPLUG_CPU
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               watchdog_disable(hotcpu);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               watchdog_disable(hotcpu);
-               break;
-#endif /* CONFIG_HOTPLUG_CPU */
-       }
-
-       /*
-        * hardlockup and softlockup are not important enough
-        * to block cpu bring up.  Just always succeed and
-        * rely on printk output to flag problems.
-        */
-       return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cpu_nfb = {
-       .notifier_call = cpu_callback
+static struct smp_hotplug_thread watchdog_threads = {
+       .store                  = &softlockup_watchdog,
+       .thread_should_run      = watchdog_should_run,
+       .thread_fn              = watchdog,
+       .thread_comm            = "watchdog/%u",
+       .setup                  = watchdog_enable,
+       .park                   = watchdog_disable,
+       .unpark                 = watchdog_enable,
 };
 
 void __init lockup_detector_init(void)
 {
-       void *cpu = (void *)(long)smp_processor_id();
-       int err;
-
-       err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
-       WARN_ON(notifier_to_errno(err));
-
-       cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
-       register_cpu_notifier(&cpu_nfb);
-
-       return;
+       set_sample_period();
+       if (smpboot_register_percpu_thread(&watchdog_threads)) {
+               pr_err("Failed to create watchdog threads, disabled\n");
+               watchdog_disabled = -ENODEV;
+       }
 }