drivers: power: Add watchdog timer to catch drivers which lockup during suspend.
San Mehat [Thu, 13 Aug 2009 16:40:42 +0000 (09:40 -0700)]
Rather than hard-lock the kernel, we now BUG() when a driver takes
> 3 seconds to suspend. If the underlying platform supports panic dumps,
then the data can be collected for debug.

Signed-off-by: San Mehat <san@google.com>

drivers/base/power/main.c

index a854591..5e57646 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/async.h>
 #include <linux/suspend.h>
+#include <linux/timer.h>
 
 #include "../base.h"
 #include "power.h"
@@ -49,6 +50,9 @@ LIST_HEAD(dpm_noirq_list);
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+static void dpm_drv_timeout(unsigned long data);
+static DEFINE_TIMER(dpm_drv_wd, dpm_drv_timeout, 0, 0);
+
 static int async_error;
 
 /**
@@ -592,6 +596,45 @@ static bool is_async(struct device *dev)
 }
 
 /**
+ *     dpm_drv_timeout - Driver suspend / resume watchdog handler
+ *     @data: struct device which timed out
+ *
+ *     Called when a driver has timed out suspending or resuming.
+ *     There's not much we can do here to recover so
+ *     BUG() out for a crash-dump
+ *
+ */
+static void dpm_drv_timeout(unsigned long data)
+{
+       struct device *dev = (struct device *) data;
+
+       printk(KERN_EMERG "**** DPM device timeout: %s (%s)\n", dev_name(dev),
+              (dev->driver ? dev->driver->name : "no driver"));
+       BUG();
+}
+
+/**
+ *     dpm_drv_wdset - Sets up driver suspend/resume watchdog timer.
+ *     @dev: struct device which we're guarding.
+ *
+ */
+static void dpm_drv_wdset(struct device *dev)
+{
+       dpm_drv_wd.data = (unsigned long) dev;
+       mod_timer(&dpm_drv_wd, jiffies + (HZ * 3));
+}
+
+/**
+ *     dpm_drv_wdclr - clears driver suspend/resume watchdog timer.
+ *     @dev: struct device which we're no longer guarding.
+ *
+ */
+static void dpm_drv_wdclr(struct device *dev)
+{
+       del_timer_sync(&dpm_drv_wd);
+}
+
+/**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
  *
@@ -962,7 +1005,9 @@ int dpm_suspend(pm_message_t state)
                get_device(dev);
                mutex_unlock(&dpm_list_mtx);
 
+               dpm_drv_wdset(dev);
                error = device_suspend(dev);
+               dpm_drv_wdclr(dev);
 
                mutex_lock(&dpm_list_mtx);
                if (error) {