[SCSI] add global timeout to the scsi mid-layer
James Bottomley [Sat, 6 Aug 2005 02:45:40 +0000 (21:45 -0500)]
There are certain rogue devices (and the aic7xxx driver) that return
BUSY or QUEUE_FULL forever.  This code will apply a global timeout (of
the total number of retries times the per command timer) to a given
command.  If it is exceeded, the command is completed regardless of its
state.

The patch also removes the unused field in the command: timeout and
timeout_total.

This solves the problem of detecting an endless loop in the mid-layer
because of BUSY/QUEUE_FULL bouncing, but will not recover the device.
In the aic7xxx case, the driver can be recovered by sending a bus reset,
so possibly this should be tied into the error handler?

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

drivers/scsi/advansys.c
drivers/scsi/scsi.c
include/scsi/scsi_cmnd.h

index 0fb9336..37ec541 100644 (file)
@@ -9200,8 +9200,8 @@ asc_prt_scsi_cmnd(struct scsi_cmnd *s)
         (unsigned) s->serial_number, s->retries, s->allowed);
 
     printk(
-" timeout_per_command %d, timeout_total %d, timeout %d\n",
-        s->timeout_per_command, s->timeout_total, s->timeout);
+" timeout_per_command %d\n",
+        s->timeout_per_command);
 
     printk(
 " scsi_done 0x%lx, done 0x%lx, host_scribble 0x%lx, result 0x%x\n",
index d1aa95d..4befbc2 100644 (file)
@@ -268,6 +268,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, int gfp_mask)
        } else
                put_device(&dev->sdev_gendev);
 
+       cmd->jiffies_at_alloc = jiffies;
        return cmd;
 }                              
 EXPORT_SYMBOL(scsi_get_command);
@@ -798,9 +799,23 @@ static void scsi_softirq(struct softirq_action *h)
        while (!list_empty(&local_q)) {
                struct scsi_cmnd *cmd = list_entry(local_q.next,
                                                   struct scsi_cmnd, eh_entry);
+               /* The longest time any command should be outstanding is the
+                * per command timeout multiplied by the number of retries.
+                *
+                * For a typical command, this is 2.5 minutes */
+               unsigned long wait_for 
+                       = cmd->allowed * cmd->timeout_per_command;
                list_del_init(&cmd->eh_entry);
 
                disposition = scsi_decide_disposition(cmd);
+               if (disposition != SUCCESS &&
+                   time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+                       dev_printk(KERN_ERR, &cmd->device->sdev_gendev, 
+                                  "timing out command, waited %ds\n",
+                                  wait_for/HZ);
+                       disposition = SUCCESS;
+               }
+                       
                scsi_log_completion(cmd, disposition);
                switch (disposition) {
                case SUCCESS:
index 9957f16..bed4b7c 100644 (file)
@@ -51,12 +51,16 @@ struct scsi_cmnd {
         * printk's to use ->pid, so that we can kill this field.
         */
        unsigned long serial_number;
+       /*
+        * This is set to jiffies as it was when the command was first
+        * allocated.  It is used to time how long the command has
+        * been outstanding
+        */
+       unsigned long jiffies_at_alloc;
 
        int retries;
        int allowed;
        int timeout_per_command;
-       int timeout_total;
-       int timeout;
 
        unsigned char cmd_len;
        unsigned char old_cmd_len;