md: add ->takeover method to support changing the personality managing an array
NeilBrown [Tue, 31 Mar 2009 03:39:39 +0000 (14:39 +1100)]
Implement this for RAID6 to be able to 'takeover' a RAID5 array.  The
new RAID6 will use a layout which places Q on the last device, and
that device will be missing.
If there are any available spares, one will immediately have Q
recovered onto it.

Signed-off-by: NeilBrown <neilb@suse.de>

drivers/md/md.c
drivers/md/md.h
drivers/md/raid5.c

index 6cb31f8..05b613b 100644 (file)
@@ -2649,18 +2649,101 @@ level_show(mddev_t *mddev, char *page)
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
+       char level[16];
        ssize_t rv = len;
-       if (mddev->pers)
+       struct mdk_personality *pers;
+       void *priv;
+
+       if (mddev->pers == NULL) {
+               if (len == 0)
+                       return 0;
+               if (len >= sizeof(mddev->clevel))
+                       return -ENOSPC;
+               strncpy(mddev->clevel, buf, len);
+               if (mddev->clevel[len-1] == '\n')
+                       len--;
+               mddev->clevel[len] = 0;
+               mddev->level = LEVEL_NONE;
+               return rv;
+       }
+
+       /* request to change the personality.  Need to ensure:
+        *  - array is not engaged in resync/recovery/reshape
+        *  - old personality can be suspended
+        *  - new personality will access other array.
+        */
+
+       if (mddev->sync_thread || mddev->reshape_position != MaxSector)
                return -EBUSY;
-       if (len == 0)
-               return 0;
-       if (len >= sizeof(mddev->clevel))
-               return -ENOSPC;
-       strncpy(mddev->clevel, buf, len);
-       if (mddev->clevel[len-1] == '\n')
+
+       if (!mddev->pers->quiesce) {
+               printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
+                      mdname(mddev), mddev->pers->name);
+               return -EINVAL;
+       }
+
+       /* Now find the new personality */
+       if (len == 0 || len >= sizeof(level))
+               return -EINVAL;
+       strncpy(level, buf, len);
+       if (level[len-1] == '\n')
                len--;
-       mddev->clevel[len] = 0;
-       mddev->level = LEVEL_NONE;
+       level[len] = 0;
+
+       request_module("md-%s", level);
+       spin_lock(&pers_lock);
+       pers = find_pers(LEVEL_NONE, level);
+       if (!pers || !try_module_get(pers->owner)) {
+               spin_unlock(&pers_lock);
+               printk(KERN_WARNING "md: personality %s not loaded\n", level);
+               return -EINVAL;
+       }
+       spin_unlock(&pers_lock);
+
+       if (pers == mddev->pers) {
+               /* Nothing to do! */
+               module_put(pers->owner);
+               return rv;
+       }
+       if (!pers->takeover) {
+               module_put(pers->owner);
+               printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
+                      mdname(mddev), level);
+               return -EINVAL;
+       }
+
+       /* ->takeover must set new_* and/or delta_disks
+        * if it succeeds, and may set them when it fails.
+        */
+       priv = pers->takeover(mddev);
+       if (IS_ERR(priv)) {
+               mddev->new_level = mddev->level;
+               mddev->new_layout = mddev->layout;
+               mddev->new_chunk = mddev->chunk_size;
+               mddev->raid_disks -= mddev->delta_disks;
+               mddev->delta_disks = 0;
+               module_put(pers->owner);
+               printk(KERN_WARNING "md: %s: %s would not accept array\n",
+                      mdname(mddev), level);
+               return PTR_ERR(priv);
+       }
+
+       /* Looks like we have a winner */
+       mddev_suspend(mddev);
+       mddev->pers->stop(mddev);
+       module_put(mddev->pers->owner);
+       mddev->pers = pers;
+       mddev->private = priv;
+       strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+       mddev->level = mddev->new_level;
+       mddev->layout = mddev->new_layout;
+       mddev->chunk_size = mddev->new_chunk;
+       mddev->delta_disks = 0;
+       pers->run(mddev);
+       mddev_resume(mddev);
+       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
        return rv;
 }
 
index 84b22d6..8034f62 100644 (file)
@@ -321,6 +321,16 @@ struct mdk_personality
         * others - reserved
         */
        void (*quiesce) (mddev_t *mddev, int state);
+       /* takeover is used to transition an array from one
+        * personality to another.  The new personality must be able
+        * to handle the data in the current layout.
+        * e.g. 2drive raid1 -> 2drive raid5
+        *      ndrive raid5 -> degraded n+1drive raid6 with special layout
+        * If the takeover succeeds, a new 'private' structure is returned.
+        * This needs to be installed and then ->run used to activate the
+        * array.
+        */
+       void *(*takeover) (mddev_t *mddev);
 };
 
 
index 81789fa..5b346b4 100644 (file)
@@ -933,8 +933,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
        struct kmem_cache *sc;
        int devs = conf->raid_disks;
 
-       sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
-       sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+       sprintf(conf->cache_name[0],
+               "raid%d-%s", conf->level, mdname(conf->mddev));
+       sprintf(conf->cache_name[1],
+               "raid%d-%s-alt", conf->level, mdname(conf->mddev));
        conf->active_name = 0;
        sc = kmem_cache_create(conf->cache_name[conf->active_name],
                               sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -4361,10 +4363,12 @@ static int run(mddev_t *mddev)
                BUG_ON(mddev->chunk_size != mddev->new_chunk);
                BUG_ON(mddev->delta_disks != 0);
        }
-       conf = setup_conf(mddev);
 
-       if (conf == NULL)
-               return -EIO;
+       if (mddev->private == NULL)
+               conf = setup_conf(mddev);
+       else
+               conf = mddev->private;
+
        if (IS_ERR(conf))
                return PTR_ERR(conf);
 
@@ -4880,6 +4884,55 @@ static void raid5_quiesce(mddev_t *mddev, int state)
        }
 }
 
+static struct mdk_personality raid5_personality;
+
+static void *raid6_takeover(mddev_t *mddev)
+{
+       /* Currently can only take over a raid5.  We map the
+        * personality to an equivalent raid6 personality
+        * with the Q block at the end.
+        */
+       int new_layout;
+
+       if (mddev->pers != &raid5_personality)
+               return ERR_PTR(-EINVAL);
+       if (mddev->degraded > 1)
+               return ERR_PTR(-EINVAL);
+       if (mddev->raid_disks > 253)
+               return ERR_PTR(-EINVAL);
+       if (mddev->raid_disks < 3)
+               return ERR_PTR(-EINVAL);
+
+       switch (mddev->layout) {
+       case ALGORITHM_LEFT_ASYMMETRIC:
+               new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+               break;
+       case ALGORITHM_RIGHT_ASYMMETRIC:
+               new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+               break;
+       case ALGORITHM_LEFT_SYMMETRIC:
+               new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
+               break;
+       case ALGORITHM_RIGHT_SYMMETRIC:
+               new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+               break;
+       case ALGORITHM_PARITY_0:
+               new_layout = ALGORITHM_PARITY_0_6;
+               break;
+       case ALGORITHM_PARITY_N:
+               new_layout = ALGORITHM_PARITY_N;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+       mddev->new_level = 6;
+       mddev->new_layout = new_layout;
+       mddev->delta_disks = 1;
+       mddev->raid_disks += 1;
+       return setup_conf(mddev);
+}
+
+
 static struct mdk_personality raid6_personality =
 {
        .name           = "raid6",
@@ -4900,6 +4953,7 @@ static struct mdk_personality raid6_personality =
        .start_reshape  = raid5_start_reshape,
 #endif
        .quiesce        = raid5_quiesce,
+       .takeover       = raid6_takeover,
 };
 static struct mdk_personality raid5_personality =
 {