md/raid456: When read error cannot be recovered, record bad block
majianpeng [Tue, 3 Jul 2012 05:57:02 +0000 (15:57 +1000)]
We may not be able to fix a bad block if:
 - the array is degraded
 - the over-write fails.

In these cases we currently eject the device, but we should
record a bad block if possible.

Signed-off-by: majianpeng <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>

drivers/md/raid5.c

index a5135e5..51169ec 100644 (file)
@@ -1743,6 +1743,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
        } else {
                const char *bdn = bdevname(rdev->bdev, b);
                int retry = 0;
+               int set_bad = 0;
 
                clear_bit(R5_UPTODATE, &sh->dev[i].flags);
                atomic_inc(&rdev->read_errors);
@@ -1754,7 +1755,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
                                mdname(conf->mddev),
                                (unsigned long long)s,
                                bdn);
-               else if (conf->mddev->degraded >= conf->max_degraded)
+               else if (conf->mddev->degraded >= conf->max_degraded) {
+                       set_bad = 1;
                        printk_ratelimited(
                                KERN_WARNING
                                "md/raid:%s: read error not correctable "
@@ -1762,8 +1764,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
                                mdname(conf->mddev),
                                (unsigned long long)s,
                                bdn);
-               else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+               } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
                        /* Oh, no!!! */
+                       set_bad = 1;
                        printk_ratelimited(
                                KERN_WARNING
                                "md/raid:%s: read error NOT corrected!! "
@@ -1771,7 +1774,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
                                mdname(conf->mddev),
                                (unsigned long long)s,
                                bdn);
-               else if (atomic_read(&rdev->read_errors)
+               } else if (atomic_read(&rdev->read_errors)
                         > conf->max_nr_stripes)
                        printk(KERN_WARNING
                               "md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1783,7 +1786,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
                else {
                        clear_bit(R5_ReadError, &sh->dev[i].flags);
                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
-                       md_error(conf->mddev, rdev);
+                       if (!(set_bad
+                             && test_bit(In_sync, &rdev->flags)
+                             && rdev_set_badblocks(
+                                     rdev, sh->sector, STRIPE_SECTORS, 0)))
+                               md_error(conf->mddev, rdev);
                }
        }
        rdev_dec_pending(rdev, conf->mddev);