kernel: watchdog: query CPU soft/hard lockup status
Varun Wadekar [Wed, 4 Dec 2013 12:02:51 +0000 (17:02 +0530)]
Allow system drivers to know if any of the CPUs entered soft/hard
lockup state. This allows the FIQ watchdog timer to stop kicking
the watchdog so that we get callstack for all online CPUs.

Bug 1326082

Change-Id: Id6dbd4074430a13da53cf791a6c877987e9fd1a8
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
Reviewed-on: http://git-master/r/338250

include/linux/nmi.h
kernel/watchdog.c

index c8f8aa0..a461766 100644 (file)
@@ -9,7 +9,7 @@
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
- * 
+ *
  * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
@@ -54,6 +54,7 @@ extern int watchdog_thresh;
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
                           void __user *, size_t *, loff_t *);
+extern int watchdog_get_lockup_state(void);
 #endif
 
 #endif
index e092e5a..274e737 100644 (file)
@@ -53,6 +53,9 @@ static cpumask_t __read_mostly watchdog_cpus;
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
 
+static __read_mostly int soft_lockup_detected;
+static __read_mostly int hard_lockup_detected;
+
 /* boot commands */
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -101,6 +104,11 @@ static int __init nosoftlockup_setup(char *str)
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
 
+int watchdog_get_lockup_state(void)
+{
+       return ((soft_lockup_detected << 8) || hard_lockup_detected);
+}
+
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
  * lockups can have false positives under extreme conditions. So we generally
@@ -253,6 +261,8 @@ static void watchdog_check_hardlockup_other_cpu(void)
                if (per_cpu(hard_watchdog_warn, next_cpu) == true)
                        return;
 
+               hard_lockup_detected = 1;
+
                if (hardlockup_panic)
                        panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
                else
@@ -310,6 +320,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
        if (is_hardlockup()) {
                int this_cpu = smp_processor_id();
 
+               hard_lockup_detected = 1;
+
                /* only print hardlockups once */
                if (__this_cpu_read(hard_watchdog_warn) == true)
                        return;
@@ -391,6 +403,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
                if (__this_cpu_read(soft_watchdog_warn) == true)
                        return HRTIMER_RESTART;
 
+               soft_lockup_detected = 1;
+
                printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
                        smp_processor_id(), duration,
                        current->comm, task_pid_nr(current));