[SCSI] fix oops on usb storage device disconnect
James Bottomley [Sun, 18 Sep 2005 20:05:20 +0000 (15:05 -0500)]
We fix the oops by enforcing the host state model.  There have also
been two extra states added: SHOST_CANCEL_RECOVERY and
SHOST_DEL_RECOVERY so we can take the model through host removal while
the recovery thread is active.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

drivers/scsi/hosts.c
drivers/scsi/scsi.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_ioctl.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/sg.c
include/scsi/scsi_host.h

index 85503fa..f2a72d3 100644 (file)
@@ -98,6 +98,7 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
                switch (oldstate) {
                case SHOST_CREATED:
                case SHOST_RUNNING:
+               case SHOST_CANCEL_RECOVERY:
                        break;
                default:
                        goto illegal;
@@ -107,12 +108,31 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
        case SHOST_DEL:
                switch (oldstate) {
                case SHOST_CANCEL:
+               case SHOST_DEL_RECOVERY:
                        break;
                default:
                        goto illegal;
                }
                break;
 
+       case SHOST_CANCEL_RECOVERY:
+               switch (oldstate) {
+               case SHOST_CANCEL:
+               case SHOST_RECOVERY:
+                       break;
+               default:
+                       goto illegal;
+               }
+               break;
+
+       case SHOST_DEL_RECOVERY:
+               switch (oldstate) {
+               case SHOST_CANCEL_RECOVERY:
+                       break;
+               default:
+                       goto illegal;
+               }
+               break;
        }
        shost->shost_state = state;
        return 0;
@@ -134,13 +154,24 @@ EXPORT_SYMBOL(scsi_host_set_state);
  **/
 void scsi_remove_host(struct Scsi_Host *shost)
 {
+       unsigned long flags;
        down(&shost->scan_mutex);
-       scsi_host_set_state(shost, SHOST_CANCEL);
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_CANCEL))
+               if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) {
+                       spin_unlock_irqrestore(shost->host_lock, flags);
+                       up(&shost->scan_mutex);
+                       return;
+               }
+       spin_unlock_irqrestore(shost->host_lock, flags);
        up(&shost->scan_mutex);
        scsi_forget_host(shost);
        scsi_proc_host_rm(shost);
 
-       scsi_host_set_state(shost, SHOST_DEL);
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_DEL))
+               BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
+       spin_unlock_irqrestore(shost->host_lock, flags);
 
        transport_unregister_device(&shost->shost_gendev);
        class_device_unregister(&shost->shost_classdev);
index a780546..1f0ebab 100644 (file)
@@ -1265,9 +1265,8 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
                list_for_each_safe(lh, lh_sf, &active_list) {
                        scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
                        list_del_init(lh);
-                       if (recovery) {
-                               scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD);
-                       } else {
+                       if (recovery &&
+                           !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) {
                                scmd->result = (DID_ABORT << 16);
                                scsi_finish_command(scmd);
                        }
index 895c945..af589fa 100644 (file)
@@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
 {
        struct Scsi_Host *shost = scmd->device->host;
        unsigned long flags;
+       int ret = 0;
 
        if (shost->eh_wait == NULL)
                return 0;
 
        spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_RECOVERY))
+               if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+                       goto out_unlock;
 
+       ret = 1;
        scmd->eh_eflags |= eh_flag;
        list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-       scsi_host_set_state(shost, SHOST_RECOVERY);
        shost->host_failed++;
        scsi_eh_wakeup(shost);
+ out_unlock:
        spin_unlock_irqrestore(shost->host_lock, flags);
-       return 1;
+       return ret;
 }
 
 /**
@@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *scmd)
                }
 
        if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
-               panic("Error handler thread not present at %p %p %s %d",
-                     scmd, scmd->device->host, __FILE__, __LINE__);
+               scmd->result |= DID_TIME_OUT << 16;
+               __scsi_done(scmd);
        }
 }
 
@@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev)
 {
        int online;
 
-       wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
-                                          SHOST_RECOVERY));
+       wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
 
        online = scsi_device_online(sdev);
 
@@ -1441,6 +1445,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 static void scsi_restart_operations(struct Scsi_Host *shost)
 {
        struct scsi_device *sdev;
+       unsigned long flags;
 
        /*
         * If the door was locked, we need to insert a door lock request
@@ -1460,7 +1465,11 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
        SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
                                          __FUNCTION__));
 
-       scsi_host_set_state(shost, SHOST_RUNNING);
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_RUNNING))
+               if (scsi_host_set_state(shost, SHOST_CANCEL))
+                       BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
+       spin_unlock_irqrestore(shost->host_lock, flags);
 
        wake_up(&shost->host_wait);
 
index b7fddac..de7f98c 100644 (file)
@@ -458,7 +458,7 @@ int scsi_nonblockable_ioctl(struct scsi_device *sdev, int cmd,
         * error processing, as long as the device was opened
         * non-blocking */
        if (filp && filp->f_flags & O_NONBLOCK) {
-               if (sdev->host->shost_state == SHOST_RECOVERY)
+               if (scsi_host_in_recovery(sdev->host))
                        return -ENODEV;
        } else if (!scsi_block_when_processing_errors(sdev))
                return -ENODEV;
index f065cbc..dc9c772 100644 (file)
@@ -447,7 +447,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 
        spin_lock_irqsave(shost->host_lock, flags);
        shost->host_busy--;
-       if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
+       if (unlikely(scsi_host_in_recovery(shost) &&
                     shost->host_failed))
                scsi_eh_wakeup(shost);
        spin_unlock(shost->host_lock);
@@ -1339,7 +1339,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
                                   struct Scsi_Host *shost,
                                   struct scsi_device *sdev)
 {
-       if (shost->shost_state == SHOST_RECOVERY)
+       if (scsi_host_in_recovery(shost))
                return 0;
        if (shost->host_busy == 0 && shost->host_blocked) {
                /*
index 1e47b7e..72a6550 100644 (file)
@@ -57,6 +57,8 @@ static struct {
        { SHOST_CANCEL, "cancel" },
        { SHOST_DEL, "deleted" },
        { SHOST_RECOVERY, "recovery" },
+       { SHOST_CANCEL_RECOVERY, "cancel/recovery" },
+       { SHOST_DEL_RECOVERY, "deleted/recovery", },
 };
 const char *scsi_host_state_name(enum scsi_host_state state)
 {
index 9ea4765..4d09a6e 100644 (file)
@@ -1027,7 +1027,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
                if (sdp->detached)
                        return -ENODEV;
                if (filp->f_flags & O_NONBLOCK) {
-                       if (sdp->device->host->shost_state == SHOST_RECOVERY)
+                       if (scsi_host_in_recovery(sdp->device->host))
                                return -EBUSY;
                } else if (!scsi_block_when_processing_errors(sdp->device))
                        return -EBUSY;
index 916144b..540369f 100644 (file)
@@ -439,6 +439,8 @@ enum scsi_host_state {
        SHOST_CANCEL,
        SHOST_DEL,
        SHOST_RECOVERY,
+       SHOST_CANCEL_RECOVERY,
+       SHOST_DEL_RECOVERY,
 };
 
 struct Scsi_Host {
@@ -621,6 +623,13 @@ static inline struct Scsi_Host *dev_to_shost(struct device *dev)
        return container_of(dev, struct Scsi_Host, shost_gendev);
 }
 
+static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
+{
+       return shost->shost_state == SHOST_RECOVERY ||
+               shost->shost_state == SHOST_CANCEL_RECOVERY ||
+               shost->shost_state == SHOST_DEL_RECOVERY;
+}
+
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
 extern void scsi_flush_work(struct Scsi_Host *);