Don't let a blocked_rdev interfere with read request in raid5/6
NeilBrown [Tue, 5 Aug 2008 05:54:13 +0000 (15:54 +1000)]
When we have externally managed metadata, we need to mark a failed
device as 'Blocked' and not allow any writes until that device
have been marked as faulty in the metadata and the Blocked flag has
been removed.

However it is perfectly OK to allow read requests when there is a
Blocked device, and with a readonly array, there may not be any
metadata-handler watching for blocked devices.

So in raid5/raid6 only allow a Blocked device to interfere with
Write request or resync.  Read requests go through untouched.

raid1 and raid10 already differentiate between read and write
properly.

Signed-off-by: NeilBrown <neilb@suse.de>

drivers/md/raid5.c

index 17e0953..224de02 100644 (file)
@@ -2568,10 +2568,10 @@ static bool handle_stripe5(struct stripe_head *sh)
                if (dev->written)
                        s.written++;
                rdev = rcu_dereference(conf->disks[i].rdev);
-               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+               if (blocked_rdev == NULL &&
+                   rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
                        blocked_rdev = rdev;
                        atomic_inc(&rdev->nr_pending);
-                       break;
                }
                if (!rdev || !test_bit(In_sync, &rdev->flags)) {
                        /* The ReadError flag will just be confusing now */
@@ -2588,8 +2588,14 @@ static bool handle_stripe5(struct stripe_head *sh)
        rcu_read_unlock();
 
        if (unlikely(blocked_rdev)) {
-               set_bit(STRIPE_HANDLE, &sh->state);
-               goto unlock;
+               if (s.syncing || s.expanding || s.expanded ||
+                   s.to_write || s.written) {
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       goto unlock;
+               }
+               /* There is nothing for the blocked_rdev to block */
+               rdev_dec_pending(blocked_rdev, conf->mddev);
+               blocked_rdev = NULL;
        }
 
        if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -2832,10 +2838,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                if (dev->written)
                        s.written++;
                rdev = rcu_dereference(conf->disks[i].rdev);
-               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+               if (blocked_rdev == NULL &&
+                   rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
                        blocked_rdev = rdev;
                        atomic_inc(&rdev->nr_pending);
-                       break;
                }
                if (!rdev || !test_bit(In_sync, &rdev->flags)) {
                        /* The ReadError flag will just be confusing now */
@@ -2853,9 +2859,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        rcu_read_unlock();
 
        if (unlikely(blocked_rdev)) {
-               set_bit(STRIPE_HANDLE, &sh->state);
-               goto unlock;
+               if (s.syncing || s.expanding || s.expanded ||
+                   s.to_write || s.written) {
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       goto unlock;
+               }
+               /* There is nothing for the blocked_rdev to block */
+               rdev_dec_pending(blocked_rdev, conf->mddev);
+               blocked_rdev = NULL;
        }
+
        pr_debug("locked=%d uptodate=%d to_read=%d"
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,