mm: correctly synchronize rss-counters at exit/exec
[linux-2.6.git] / kernel / watchdog.c
index 2feb287..df30ee0 100644 (file)
@@ -3,15 +3,14 @@
  *
  * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
  *
- * this code detects hard lockups: incidents in where on a CPU
- * the kernel does not respond to anything except NMI.
- *
- * Note: Most of this code is borrowed heavily from softlockup.c,
- * so thanks to Ingo for the initial implementation.
- * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
  * to those contributors as well.
  */
 
+#define pr_fmt(fmt) "NMI watchdog: " fmt
+
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/nmi.h>
@@ -27,8 +26,8 @@
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
-int watchdog_enabled;
-int __read_mostly softlockup_thresh = 60;
+int watchdog_enabled = 1;
+int __read_mostly watchdog_thresh = 10;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -43,21 +42,22 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
 
-static int __read_mostly did_panic;
-static int __initdata no_watchdog;
-
-
 /* boot commands */
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic;
+static int hardlockup_panic =
+                       CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
 static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+       else if (!strncmp(str, "nopanic", 7))
+               hardlockup_panic = 0;
+       else if (!strncmp(str, "0", 1))
+               watchdog_enabled = 0;
        return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -76,7 +76,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
 
 static int __init nowatchdog_setup(char *str)
 {
-       no_watchdog = 1;
+       watchdog_enabled = 0;
        return 1;
 }
 __setup("nowatchdog", nowatchdog_setup);
@@ -84,12 +84,23 @@ __setup("nowatchdog", nowatchdog_setup);
 /* deprecated */
 static int __init nosoftlockup_setup(char *str)
 {
-       no_watchdog = 1;
+       watchdog_enabled = 0;
        return 1;
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
 
+/*
+ * Hard-lockup warnings should be triggered after just a few seconds. Soft-
+ * lockups can have false positives under extreme conditions. So we generally
+ * want a higher threshold for soft lockups than for hard lockups. So we couple
+ * the thresholds with a factor: we make the soft threshold twice the amount of
+ * time the hard threshold is.
+ */
+static int get_softlockup_thresh(void)
+{
+       return watchdog_thresh * 2;
+}
 
 /*
  * Returns seconds, approximately.  We don't need nanosecond
@@ -104,12 +115,13 @@ static unsigned long get_timestamp(int this_cpu)
 static unsigned long get_sample_period(void)
 {
        /*
-        * convert softlockup_thresh from seconds to ns
-        * the divide by 5 is to give hrtimer 5 chances to
-        * increment before the hardlockup detector generates
-        * a warning
+        * convert watchdog_thresh from seconds to ns
+        * the divide by 5 is to give hrtimer several chances (two
+        * or three with the current relation between the soft
+        * and hard thresholds) to increment before the
+        * hardlockup detector generates a warning
         */
-       return softlockup_thresh / 5 * NSEC_PER_SEC;
+       return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
 }
 
 /* Commands for resetting the watchdog */
@@ -117,12 +129,12 @@ static void __touch_watchdog(void)
 {
        int this_cpu = smp_processor_id();
 
-       __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
+       __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
 }
 
 void touch_softlockup_watchdog(void)
 {
-       __raw_get_cpu_var(watchdog_touch_ts) = 0;
+       __this_cpu_write(watchdog_touch_ts, 0);
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -166,12 +178,12 @@ void touch_softlockup_watchdog_sync(void)
 /* watchdog detector functions */
 static int is_hardlockup(void)
 {
-       unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
+       unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 
-       if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
+       if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
                return 1;
 
-       __get_cpu_var(hrtimer_interrupts_saved) = hrint;
+       __this_cpu_write(hrtimer_interrupts_saved, hrint);
        return 0;
 }
 #endif
@@ -181,25 +193,14 @@ static int is_softlockup(unsigned long touch_ts)
        unsigned long now = get_timestamp(smp_processor_id());
 
        /* Warn about unreasonable delays: */
-       if (time_after(now, touch_ts + softlockup_thresh))
+       if (time_after(now, touch_ts + get_softlockup_thresh()))
                return now - touch_ts;
 
        return 0;
 }
 
-static int
-watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-       did_panic = 1;
-
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-       .notifier_call = watchdog_panic,
-};
-
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
+
 static struct perf_event_attr wd_hw_attr = {
        .type           = PERF_TYPE_HARDWARE,
        .config         = PERF_COUNT_HW_CPU_CYCLES,
@@ -209,15 +210,15 @@ static struct perf_event_attr wd_hw_attr = {
 };
 
 /* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event, int nmi,
+static void watchdog_overflow_callback(struct perf_event *event,
                 struct perf_sample_data *data,
                 struct pt_regs *regs)
 {
        /* Ensure the watchdog never gets throttled */
        event->hw.interrupts = 0;
 
-       if (__get_cpu_var(watchdog_nmi_touch) == true) {
-               __get_cpu_var(watchdog_nmi_touch) = false;
+       if (__this_cpu_read(watchdog_nmi_touch) == true) {
+               __this_cpu_write(watchdog_nmi_touch, false);
                return;
        }
 
@@ -231,7 +232,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
                int this_cpu = smp_processor_id();
 
                /* only print hardlockups once */
-               if (__get_cpu_var(hard_watchdog_warn) == true)
+               if (__this_cpu_read(hard_watchdog_warn) == true)
                        return;
 
                if (hardlockup_panic)
@@ -239,16 +240,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
                else
                        WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
 
-               __get_cpu_var(hard_watchdog_warn) = true;
+               __this_cpu_write(hard_watchdog_warn, true);
                return;
        }
 
-       __get_cpu_var(hard_watchdog_warn) = false;
+       __this_cpu_write(hard_watchdog_warn, false);
        return;
 }
 static void watchdog_interrupt_count(void)
 {
-       __get_cpu_var(hrtimer_interrupts)++;
+       __this_cpu_inc(hrtimer_interrupts);
 }
 #else
 static inline void watchdog_interrupt_count(void) { return; }
@@ -257,7 +258,7 @@ static inline void watchdog_interrupt_count(void) { return; }
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-       unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
+       unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
        struct pt_regs *regs = get_irq_regs();
        int duration;
 
@@ -265,18 +266,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        watchdog_interrupt_count();
 
        /* kick the softlockup detector */
-       wake_up_process(__get_cpu_var(softlockup_watchdog));
+       wake_up_process(__this_cpu_read(softlockup_watchdog));
 
        /* .. and repeat */
        hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
 
        if (touch_ts == 0) {
-               if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
+               if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
                        /*
                         * If the time stamp was touched atomically
                         * make sure the scheduler tick is up to date.
                         */
-                       __get_cpu_var(softlockup_touch_sync) = false;
+                       __this_cpu_write(softlockup_touch_sync, false);
                        sched_clock_tick();
                }
                __touch_watchdog();
@@ -292,10 +293,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        duration = is_softlockup(touch_ts);
        if (unlikely(duration)) {
                /* only warn once */
-               if (__get_cpu_var(soft_watchdog_warn) == true)
+               if (__this_cpu_read(soft_watchdog_warn) == true)
                        return HRTIMER_RESTART;
 
-               printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
+               printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));
                print_modules();
@@ -307,9 +308,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 
                if (softlockup_panic)
                        panic("softlockup: hung tasks");
-               __get_cpu_var(soft_watchdog_warn) = true;
+               __this_cpu_write(soft_watchdog_warn, true);
        } else
-               __get_cpu_var(soft_watchdog_warn) = false;
+               __this_cpu_write(soft_watchdog_warn, false);
 
        return HRTIMER_RESTART;
 }
@@ -320,11 +321,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  */
 static int watchdog(void *unused)
 {
-       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+       struct sched_param param = { .sched_priority = 0 };
        struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
-       sched_setscheduler(current, SCHED_FIFO, &param);
-
        /* initialize timestamp */
        __touch_watchdog();
 
@@ -335,9 +334,11 @@ static int watchdog(void *unused)
 
        set_current_state(TASK_INTERRUPTIBLE);
        /*
-        * Run briefly once per second to reset the softlockup timestamp.
-        * If this gets delayed for more than 60 seconds then the
-        * debug-printout triggers in watchdog_timer_fn().
+        * Run briefly (kicked by the hrtimer callback function) once every
+        * get_sample_period() seconds (4 seconds by default) to reset the
+        * softlockup timestamp. If this gets delayed for more than
+        * 2*watchdog_thresh seconds then the debug-printout triggers in
+        * watchdog_timer_fn().
         */
        while (!kthread_should_stop()) {
                __touch_watchdog();
@@ -348,8 +349,12 @@ static int watchdog(void *unused)
 
                set_current_state(TASK_INTERRUPTIBLE);
        }
+       /*
+        * Drop the policy/priority elevation during thread exit to avoid a
+        * scheduling latency spike.
+        */
        __set_current_state(TASK_RUNNING);
-
+       sched_setscheduler(current, SCHED_NORMAL, &param);
        return 0;
 }
 
@@ -368,17 +373,27 @@ static int watchdog_nmi_enable(int cpu)
        if (event != NULL)
                goto out_enable;
 
-       /* Try to register using hardware perf events */
        wd_attr = &wd_hw_attr;
-       wd_attr->sample_period = hw_nmi_get_sample_period();
-       event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
+       wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+
+       /* Try to register using hardware perf events */
+       event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
        if (!IS_ERR(event)) {
-               printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+               pr_info("enabled, takes one hw-pmu counter.\n");
                goto out_save;
        }
 
-       printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
-       return -1;
+
+       /* vary the KERN level based on the returned errno */
+       if (PTR_ERR(event) == -EOPNOTSUPP)
+               pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
+       else if (PTR_ERR(event) == -ENOENT)
+               pr_warning("disabled (cpu%i): hardware events not enabled\n",
+                        cpu);
+       else
+               pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
+                       cpu, PTR_ERR(event));
+       return PTR_ERR(event);
 
        /* success path */
 out_save:
@@ -408,42 +423,48 @@ static void watchdog_nmi_disable(int cpu) { return; }
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 /* prepare/enable/disable routines */
-static int watchdog_prepare_cpu(int cpu)
+static void watchdog_prepare_cpu(int cpu)
 {
        struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
 
        WARN_ON(per_cpu(softlockup_watchdog, cpu));
        hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hrtimer->function = watchdog_timer_fn;
-
-       return 0;
 }
 
 static int watchdog_enable(int cpu)
 {
        struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+       int err = 0;
 
        /* enable the perf event */
-       if (watchdog_nmi_enable(cpu) != 0)
-               return -1;
+       err = watchdog_nmi_enable(cpu);
+
+       /* Regardless of err above, fall through and start softlockup */
 
        /* create the watchdog thread */
        if (!p) {
-               p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
+               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+               p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
                if (IS_ERR(p)) {
-                       printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
-                       return -1;
+                       pr_err("softlockup watchdog for %i failed\n", cpu);
+                       if (!err) {
+                               /* if hardlockup hasn't already set this */
+                               err = PTR_ERR(p);
+                               /* and disable the perf event */
+                               watchdog_nmi_disable(cpu);
+                       }
+                       goto out;
                }
+               sched_setscheduler(p, SCHED_FIFO, &param);
                kthread_bind(p, cpu);
                per_cpu(watchdog_touch_ts, cpu) = 0;
                per_cpu(softlockup_watchdog, cpu) = p;
                wake_up_process(p);
        }
 
-       /* if any cpu succeeds, watchdog is considered enabled for the system */
-       watchdog_enabled = 1;
-
-       return 0;
+out:
+       return err;
 }
 
 static void watchdog_disable(int cpu)
@@ -467,16 +488,22 @@ static void watchdog_disable(int cpu)
        }
 }
 
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
 static void watchdog_enable_all_cpus(void)
 {
        int cpu;
-       int result = 0;
+
+       watchdog_enabled = 0;
 
        for_each_online_cpu(cpu)
-               result += watchdog_enable(cpu);
+               if (!watchdog_enable(cpu))
+                       /* if any cpu succeeds, watchdog is considered
+                          enabled for the system */
+                       watchdog_enabled = 1;
 
-       if (result)
-               printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
+       if (!watchdog_enabled)
+               pr_err("failed to be enabled on some cpus\n");
 
 }
 
@@ -492,29 +519,26 @@ static void watchdog_disable_all_cpus(void)
 }
 
 
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
 /*
- * proc handler for /proc/sys/kernel/nmi_watchdog
+ * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
  */
 
-int proc_dowatchdog_enabled(struct ctl_table *table, int write,
-                    void __user *buffer, size_t *length, loff_t *ppos)
+int proc_dowatchdog(struct ctl_table *table, int write,
+                   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       proc_dointvec(table, write, buffer, length, ppos);
+       int ret;
 
-       if (watchdog_enabled)
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret || !write)
+               goto out;
+
+       if (watchdog_enabled && watchdog_thresh)
                watchdog_enable_all_cpus();
        else
                watchdog_disable_all_cpus();
-       return 0;
-}
 
-int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-                            void __user *buffer,
-                            size_t *lenp, loff_t *ppos)
-{
-       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+out:
+       return ret;
 }
 #endif /* CONFIG_SYSCTL */
 
@@ -530,13 +554,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               if (watchdog_prepare_cpu(hotcpu))
-                       return NOTIFY_BAD;
+               watchdog_prepare_cpu(hotcpu);
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-               if (watchdog_enable(hotcpu))
-                       return NOTIFY_BAD;
+               if (watchdog_enabled)
+                       watchdog_enable(hotcpu);
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
@@ -549,6 +572,12 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
 #endif /* CONFIG_HOTPLUG_CPU */
        }
+
+       /*
+        * hardlockup and softlockup are not important enough
+        * to block cpu bring up.  Just always succeed and
+        * rely on printk output to flag problems.
+        */
        return NOTIFY_OK;
 }
 
@@ -556,22 +585,16 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
        .notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
-       if (no_watchdog)
-               return 0;
-
        err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
-       WARN_ON(err == NOTIFY_BAD);
+       WARN_ON(notifier_to_errno(err));
 
        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
        register_cpu_notifier(&cpu_nfb);
 
-       atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
-       return 0;
+       return;
 }
-early_initcall(spawn_watchdog_task);