[PATCH] md: Improve locking around error handling
NeilBrown [Tue, 3 Oct 2006 08:15:53 +0000 (01:15 -0700)]
The error handling routines don't use proper locking, and so two concurrent
errors could trigger a problem.

So:
  - use test-and-set and test-and-clear to synchonise
    the In_sync bits with the ->degraded count
  - use the spinlock to protect updates to the
    degraded count (could use an atomic_t but that
    would be a bigger change in code, and isn't
    really justified)
  - remove un-necessary locking in raid5

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c

index 3fc9ec2..99c4e03 100644 (file)
@@ -959,14 +959,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 * normal single drive
                 */
                return;
-       if (test_bit(In_sync, &rdev->flags)) {
+       if (test_and_clear_bit(In_sync, &rdev->flags)) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
                mddev->degraded++;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
                /*
                 * if recovery is running, make sure it aborts.
                 */
                set_bit(MD_RECOVERY_ERR, &mddev->recovery);
        }
-       clear_bit(In_sync, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
        printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
@@ -1022,9 +1024,11 @@ static int raid1_spare_active(mddev_t *mddev)
                mdk_rdev_t *rdev = conf->mirrors[i].rdev;
                if (rdev
                    && !test_bit(Faulty, &rdev->flags)
-                   && !test_bit(In_sync, &rdev->flags)) {
+                   && !test_and_set_bit(In_sync, &rdev->flags)) {
+                       unsigned long flags;
+                       spin_lock_irqsave(&conf->device_lock, flags);
                        mddev->degraded--;
-                       set_bit(In_sync, &rdev->flags);
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
                }
        }
 
@@ -2048,7 +2052,7 @@ static int raid1_reshape(mddev_t *mddev)
        mirror_info_t *newmirrors;
        conf_t *conf = mddev_to_conf(mddev);
        int cnt, raid_disks;
-
+       unsigned long flags;
        int d, d2;
 
        /* Cannot change chunk_size, layout, or level */
@@ -2107,7 +2111,9 @@ static int raid1_reshape(mddev_t *mddev)
        kfree(conf->poolinfo);
        conf->poolinfo = newpoolinfo;
 
+       spin_lock_irqsave(&conf->device_lock, flags);
        mddev->degraded += (raid_disks - conf->raid_disks);
+       spin_unlock_irqrestore(&conf->device_lock, flags);
        conf->raid_disks = mddev->raid_disks = raid_disks;
        mddev->delta_disks = 0;
 
index 233a4fa..64f8016 100644 (file)
@@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 * really dead" tests...
                 */
                return;
-       if (test_bit(In_sync, &rdev->flags)) {
+       if (test_and_clear_bit(In_sync, &rdev->flags)) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
                mddev->degraded++;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
                /*
                 * if recovery is running, make sure it aborts.
                 */
                set_bit(MD_RECOVERY_ERR, &mddev->recovery);
        }
-       clear_bit(In_sync, &rdev->flags);
        set_bit(Faulty, &rdev->flags);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
        printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
@@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *mddev)
                tmp = conf->mirrors + i;
                if (tmp->rdev
                    && !test_bit(Faulty, &tmp->rdev->flags)
-                   && !test_bit(In_sync, &tmp->rdev->flags)) {
+                   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+                       unsigned long flags;
+                       spin_lock_irqsave(&conf->device_lock, flags);
                        mddev->degraded--;
-                       set_bit(In_sync, &tmp->rdev->flags);
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
                }
        }
 
index 46fd651..6cea9c9 100644 (file)
@@ -636,7 +636,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
        struct stripe_head *sh = bi->bi_private;
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks, i;
-       unsigned long flags;
        int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
 
        if (bi->bi_size)
@@ -654,7 +653,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
                return 0;
        }
 
-       spin_lock_irqsave(&conf->device_lock, flags);
        if (!uptodate)
                md_error(conf->mddev, conf->disks[i].rdev);
 
@@ -662,8 +660,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
        
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
-       __release_stripe(conf, sh);
-       spin_unlock_irqrestore(&conf->device_lock, flags);
+       release_stripe(sh);
        return 0;
 }
 
@@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 
        if (!test_bit(Faulty, &rdev->flags)) {
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (test_bit(In_sync, &rdev->flags)) {
+               if (test_and_clear_bit(In_sync, &rdev->flags)) {
+                       unsigned long flags;
+                       spin_lock_irqsave(&conf->device_lock, flags);
                        mddev->degraded++;
-                       clear_bit(In_sync, &rdev->flags);
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
                        /*
                         * if recovery was running, make sure it aborts.
                         */
@@ -3418,9 +3417,11 @@ static int raid5_spare_active(mddev_t *mddev)
                tmp = conf->disks + i;
                if (tmp->rdev
                    && !test_bit(Faulty, &tmp->rdev->flags)
-                   && !test_bit(In_sync, &tmp->rdev->flags)) {
+                   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+                       unsigned long flags;
+                       spin_lock_irqsave(&conf->device_lock, flags);
                        mddev->degraded--;
-                       set_bit(In_sync, &tmp->rdev->flags);
+                       spin_unlock_irqrestore(&conf->device_lock, flags);
                }
        }
        print_raid5_conf(conf);
@@ -3556,6 +3557,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        struct list_head *rtmp;
        int spares = 0;
        int added_devices = 0;
+       unsigned long flags;
 
        if (mddev->degraded ||
            test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
@@ -3597,7 +3599,9 @@ static int raid5_start_reshape(mddev_t *mddev)
                                break;
                }
 
+       spin_lock_irqsave(&conf->device_lock, flags);
        mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
+       spin_unlock_irqrestore(&conf->device_lock, flags);
        mddev->raid_disks = conf->raid_disks;
        mddev->reshape_position = 0;
        set_bit(MD_CHANGE_DEVS, &mddev->flags);