dm raid: handle failed devices during start up
Jonathan E Brassow [Wed, 28 Mar 2012 17:41:26 +0000 (18:41 +0100)]
The dm-raid code currently fails to create a RAID array if any of the
superblocks cannot be read.  This was an oversight as there is already
code to handle this case if the values ('- -') were provided for the
failed array position.

With this patch, if a superblock cannot be read, the array position's
fields are initialized as though '- -' was set in the table.  That is,
the device is failed and the position should not be used, but if there
is sufficient redundancy, the array should still be activated.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

drivers/md/dm-raid.c

index c5a875d..b0ba524 100644 (file)
@@ -604,7 +604,9 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
                return 0;
 
        if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
-               DMERR("Failed to read device superblock");
+               DMERR("Failed to read superblock of device at position %d",
+                     rdev->raid_disk);
+               set_bit(Faulty, &rdev->flags);
                return -EINVAL;
        }
 
@@ -855,9 +857,25 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev)
 static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
 {
        int ret;
+       unsigned redundancy = 0;
+       struct raid_dev *dev;
        struct md_rdev *rdev, *freshest;
        struct mddev *mddev = &rs->md;
 
+       switch (rs->raid_type->level) {
+       case 1:
+               redundancy = rs->md.raid_disks - 1;
+               break;
+       case 4:
+       case 5:
+       case 6:
+               redundancy = rs->raid_type->parity_devs;
+               break;
+       default:
+               ti->error = "Unknown RAID type";
+               return -EINVAL;
+       }
+
        freshest = NULL;
        rdev_for_each(rdev, mddev) {
                if (!rdev->meta_bdev)
@@ -872,6 +890,37 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
                case 0:
                        break;
                default:
+                       dev = container_of(rdev, struct raid_dev, rdev);
+                       if (redundancy--) {
+                               if (dev->meta_dev)
+                                       dm_put_device(ti, dev->meta_dev);
+
+                               dev->meta_dev = NULL;
+                               rdev->meta_bdev = NULL;
+
+                               if (rdev->sb_page)
+                                       put_page(rdev->sb_page);
+
+                               rdev->sb_page = NULL;
+
+                               rdev->sb_loaded = 0;
+
+                               /*
+                                * We might be able to salvage the data device
+                                * even though the meta device has failed.  For
+                                * now, we behave as though '- -' had been
+                                * set for this device in the table.
+                                */
+                               if (dev->data_dev)
+                                       dm_put_device(ti, dev->data_dev);
+
+                               dev->data_dev = NULL;
+                               rdev->bdev = NULL;
+
+                               list_del(&rdev->same_set);
+
+                               continue;
+                       }
                        ti->error = "Failed to load superblock";
                        return ret;
                }
@@ -1214,7 +1263,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
        .name = "raid",
-       .version = {1, 1, 0},
+       .version = {1, 2, 0},
        .module = THIS_MODULE,
        .ctr = raid_ctr,
        .dtr = raid_dtr,