* Any code which breaks out of this loop while own
* a reference to the current mddev and must mddev_put it.
*/
-#define ITERATE_MDDEV(mddev,tmp) \
+#define for_each_mddev(mddev,tmp) \
\
for (({ spin_lock(&all_mddevs_lock); \
tmp = all_mddevs.next; \
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector;
+ new->resync_max = MaxSector;
+ new->level = LEVEL_NONE;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
kfree(new);
return NULL;
}
- set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+ /* Can be unlocked because the queue is new: no concurrency */
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
blk_queue_make_request(new->queue, md_fail_request);
mdk_rdev_t * rdev;
struct list_head *tmp;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->desc_nr == nr)
return rdev;
}
struct list_head *tmp;
mdk_rdev_t *rdev;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->bdev->bd_dev == dev)
return rdev;
}
else
rdev->desc_nr = sb->this_disk.number;
- if (refdev == 0)
+ if (!refdev) {
ret = 1;
- else {
+ } else {
__u64 ev1, ev2;
mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
__u64 ev1 = md_event(sb);
rdev->raid_disk = -1;
- rdev->flags = 0;
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
+ clear_bit(BarriersNotsupp, &rdev->flags);
+
if (mddev->raid_disks == 0) {
mddev->major_version = 0;
mddev->minor_version = sb->minor_version;
mddev->patch_version = sb->patch_version;
- mddev->persistent = 1;
mddev->external = 0;
mddev->chunk_size = sb->chunk_size;
mddev->ctime = sb->ctime;
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED);
- ITERATE_RDEV(mddev,rdev2,tmp) {
+ rdev_for_each(rdev2, tmp, mddev) {
mdp_disk_t *d;
int desc_nr;
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
if (rdev->sb_size & bmask)
- rdev-> sb_size = (rdev->sb_size | bmask)+1;
+ rdev->sb_size = (rdev->sb_size | bmask) + 1;
+
+ if (minor_version
+ && rdev->data_offset < sb_offset + (rdev->sb_size/512))
+ return -EINVAL;
if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
rdev->desc_nr = -1;
else
rdev->desc_nr = le32_to_cpu(sb->dev_number);
- if (refdev == 0)
+ if (!refdev) {
ret = 1;
- else {
+ } else {
__u64 ev1, ev2;
struct mdp_superblock_1 *refsb =
(struct mdp_superblock_1*)page_address(refdev->sb_page);
else
ret = 0;
}
- if (minor_version)
+ if (minor_version)
rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
else
rdev->size = rdev->sb_offset;
__u64 ev1 = le64_to_cpu(sb->events);
rdev->raid_disk = -1;
- rdev->flags = 0;
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(In_sync, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
+ clear_bit(BarriersNotsupp, &rdev->flags);
+
if (mddev->raid_disks == 0) {
mddev->major_version = 1;
mddev->patch_version = 0;
- mddev->persistent = 1;
mddev->external = 0;
mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
}
max_dev = 0;
- ITERATE_RDEV(mddev,rdev2,tmp)
+ rdev_for_each(rdev2, tmp, mddev)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
- ITERATE_RDEV(mddev,rdev2,tmp) {
+ rdev_for_each(rdev2, tmp, mddev) {
i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(0xfffe);
struct list_head *tmp, *tmp2;
mdk_rdev_t *rdev, *rdev2;
- ITERATE_RDEV(mddev1,rdev,tmp)
- ITERATE_RDEV(mddev2, rdev2, tmp2)
+ rdev_for_each(rdev, tmp, mddev1)
+ rdev_for_each(rdev2, tmp2, mddev2)
if (rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains)
return 1;
MD_BUG();
return -EINVAL;
}
+
+ /* prevent duplicates */
+ if (find_rdev(mddev, rdev->bdev->bd_dev))
+ return -EEXIST;
+
/* make sure rdev->size exceeds mddev->size */
if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
if (mddev->pers) {
goto fail;
}
list_add(&rdev->same_set, &mddev->disks);
- bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
+ bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
return 0;
fail:
return err;
}
-static void delayed_delete(struct work_struct *ws)
+static void md_delayed_delete(struct work_struct *ws)
{
mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
kobject_del(&rdev->kobj);
+ kobject_put(&rdev->kobj);
}
static void unbind_rdev_from_array(mdk_rdev_t * rdev)
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state"
*/
- INIT_WORK(&rdev->del_work, delayed_delete);
+ INIT_WORK(&rdev->del_work, md_delayed_delete);
+ kobject_get(&rdev->kobj);
schedule_work(&rdev->del_work);
}
* otherwise reused by a RAID array (or any other kernel
* subsystem), by bd_claiming the device.
*/
-static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
+static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- err = bd_claim(bdev, rdev);
+ err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
if (err) {
printk(KERN_ERR "md: could not bd_claim %s.\n",
bdevname(bdev, b));
blkdev_put(bdev);
return err;
}
+ if (!shared)
+ set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
return err;
}
free_disk_sb(rdev);
list_del_init(&rdev->same_set);
#ifndef MODULE
- md_autodetect_dev(rdev->bdev->bd_dev);
+ if (test_bit(AutoDetected, &rdev->flags))
+ md_autodetect_dev(rdev->bdev->bd_dev);
#endif
unlock_rdev(rdev);
kobject_put(&rdev->kobj);
struct list_head *tmp;
mdk_rdev_t *rdev;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (!rdev->mddev) {
MD_BUG();
continue;
printk("md: **********************************\n");
printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
printk("md: **********************************\n");
- ITERATE_MDDEV(mddev,tmp) {
+ for_each_mddev(mddev, tmp) {
if (mddev->bitmap)
bitmap_print_sb(mddev->bitmap);
else
printk("%s: ", mdname(mddev));
- ITERATE_RDEV(mddev,rdev,tmp2)
+ rdev_for_each(rdev, tmp2, mddev)
printk("<%s>", bdevname(rdev->bdev,b));
printk("\n");
- ITERATE_RDEV(mddev,rdev,tmp2)
+ rdev_for_each(rdev, tmp2, mddev)
print_rdev(rdev);
}
printk("md: **********************************\n");
mdk_rdev_t *rdev;
struct list_head *tmp;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev->sb_events == mddev->events ||
(nospares &&
rdev->raid_disk < 0 &&
int sync_req;
int nospares = 0;
+ if (mddev->external)
+ return;
repeat:
spin_lock_irq(&mddev->write_lock);
mdname(mddev),mddev->in_sync);
bitmap_update_sb(mddev->bitmap);
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
char b[BDEVNAME_SIZE];
dprintk(KERN_INFO "md: ");
if (rdev->sb_loaded != 1)
state_show(mdk_rdev_t *rdev, char *page)
{
char *sep = "";
- int len=0;
+ size_t len = 0;
if (test_bit(Faulty, &rdev->flags)) {
len+= sprintf(page+len, "%sfaulty",sep);
len += sprintf(page+len, "%swrite_mostly",sep);
sep = ",";
}
+ if (test_bit(Blocked, &rdev->flags)) {
+ len += sprintf(page+len, "%sblocked", sep);
+ sep = ",";
+ }
if (!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags)) {
len += sprintf(page+len, "%sspare", sep);
* remove - disconnects the device
* writemostly - sets write_mostly
* -writemostly - clears write_mostly
+ * blocked - sets the Blocked flag
+ * -blocked - clears the Blocked flag
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
err = 0;
} else if (cmd_match(buf, "-writemostly")) {
clear_bit(WriteMostly, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "blocked")) {
+ set_bit(Blocked, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-blocked")) {
+ clear_bit(Blocked, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+
err = 0;
}
return err ? err : len;
static struct rdev_sysfs_entry rdev_state =
__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
-static ssize_t
-super_show(mdk_rdev_t *rdev, char *page)
-{
- if (rdev->sb_loaded && rdev->sb_size) {
- memcpy(page, page_address(rdev->sb_page), rdev->sb_size);
- return rdev->sb_size;
- } else
- return 0;
-}
-static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);
-
static ssize_t
errors_show(mdk_rdev_t *rdev, char *page)
{
slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
+ int err;
+ char nm[20];
int slot = simple_strtoul(buf, &e, 10);
if (strncmp(buf, "none", 4)==0)
slot = -1;
else if (e==buf || (*e && *e!= '\n'))
return -EINVAL;
- if (rdev->mddev->pers)
- /* Cannot set slot in active array (yet) */
- return -EBUSY;
- if (slot >= rdev->mddev->raid_disks)
- return -ENOSPC;
- rdev->raid_disk = slot;
- /* assume it is working */
- rdev->flags = 0;
- set_bit(In_sync, &rdev->flags);
+ if (rdev->mddev->pers) {
+ /* Setting 'slot' on an active array requires also
+ * updating the 'rd%d' link, and communicating
+ * with the personality with ->hot_*_disk.
+ * For now we only support removing
+ * failed/spare devices. This normally happens automatically,
+ * but not when the metadata is externally managed.
+ */
+ if (slot != -1)
+ return -EBUSY;
+ if (rdev->raid_disk == -1)
+ return -EEXIST;
+ /* personality does all needed checks */
+ if (rdev->mddev->pers->hot_add_disk == NULL)
+ return -EINVAL;
+ err = rdev->mddev->pers->
+ hot_remove_disk(rdev->mddev, rdev->raid_disk);
+ if (err)
+ return err;
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&rdev->mddev->kobj, nm);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+ } else {
+ if (slot >= rdev->mddev->raid_disks)
+ return -ENOSPC;
+ rdev->raid_disk = slot;
+ /* assume it is working */
+ clear_bit(Faulty, &rdev->flags);
+ clear_bit(WriteMostly, &rdev->flags);
+ set_bit(In_sync, &rdev->flags);
+ }
return len;
}
return -EINVAL;
if (rdev->mddev->pers)
return -EBUSY;
+ if (rdev->size && rdev->mddev->external)
+ /* Must set offset before size, so overlap checks
+ * can be sane */
+ return -EBUSY;
rdev->data_offset = offset;
return len;
}
return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
}
+static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
+{
+ /* check if two start/length pairs overlap */
+ if (s1+l1 <= s2)
+ return 0;
+ if (s2+l2 <= s1)
+ return 0;
+ return 1;
+}
+
static ssize_t
rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
unsigned long long size = simple_strtoull(buf, &e, 10);
+ unsigned long long oldsize = rdev->size;
+ mddev_t *my_mddev = rdev->mddev;
+
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
- if (rdev->mddev->pers)
+ if (my_mddev->pers)
return -EBUSY;
rdev->size = size;
- if (size < rdev->mddev->size || rdev->mddev->size == 0)
- rdev->mddev->size = size;
+ if (size > oldsize && rdev->mddev->external) {
+ /* need to check that all other rdevs with the same ->bdev
+ * do not overlap. We need to unlock the mddev to avoid
+ * a deadlock. We have already changed rdev->size, and if
+ * we have to change it back, we will have the lock again.
+ */
+ mddev_t *mddev;
+ int overlap = 0;
+ struct list_head *tmp, *tmp2;
+
+ mddev_unlock(my_mddev);
+ for_each_mddev(mddev, tmp) {
+ mdk_rdev_t *rdev2;
+
+ mddev_lock(mddev);
+ rdev_for_each(rdev2, tmp2, mddev)
+ if (test_bit(AllReserved, &rdev2->flags) ||
+ (rdev->bdev == rdev2->bdev &&
+ rdev != rdev2 &&
+ overlaps(rdev->data_offset, rdev->size,
+ rdev2->data_offset, rdev2->size))) {
+ overlap = 1;
+ break;
+ }
+ mddev_unlock(mddev);
+ if (overlap) {
+ mddev_put(mddev);
+ break;
+ }
+ }
+ mddev_lock(my_mddev);
+ if (overlap) {
+ /* Someone else could have slipped in a size
+ * change here, but doing so is just silly.
+ * We put oldsize back because we *know* it is
+ * safe, and trust userspace not to race with
+ * itself
+ */
+ rdev->size = oldsize;
+ return -EBUSY;
+ }
+ }
+ if (size < my_mddev->size || my_mddev->size == 0)
+ my_mddev->size = size;
return len;
}
static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
- &rdev_super.attr,
&rdev_errors.attr,
&rdev_slot.attr,
&rdev_offset.attr,
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+ mddev_t *mddev = rdev->mddev;
+ ssize_t rv;
if (!entry->show)
return -EIO;
- return entry->show(rdev, page);
+
+ rv = mddev ? mddev_lock(mddev) : -EBUSY;
+ if (!rv) {
+ if (rdev->mddev == NULL)
+ rv = -EBUSY;
+ else
+ rv = entry->show(rdev, page);
+ mddev_unlock(mddev);
+ }
+ return rv;
}
static ssize_t
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+ ssize_t rv;
+ mddev_t *mddev = rdev->mddev;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- return entry->store(rdev, page, length);
+ rv = mddev ? mddev_lock(mddev): -EBUSY;
+ if (!rv) {
+ if (rdev->mddev == NULL)
+ rv = -EBUSY;
+ else
+ rv = entry->store(rdev, page, length);
+ mddev_unlock(mddev);
+ }
+ return rv;
}
static void rdev_free(struct kobject *ko)
if ((err = alloc_disk_sb(rdev)))
goto abort_free;
- err = lock_rdev(rdev, newdev);
+ err = lock_rdev(rdev, newdev, super_format == -2);
if (err)
goto abort_free;
goto abort_free;
}
}
+
INIT_LIST_HEAD(&rdev->same_set);
+ init_waitqueue_head(&rdev->blocked_wait);
return rdev;
char b[BDEVNAME_SIZE];
freshest = NULL;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
switch (super_types[mddev->major_version].
load_super(rdev, freshest, mddev->minor_version)) {
case 1:
validate_super(mddev, freshest);
i = 0;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (rdev != freshest)
if (super_types[mddev->major_version].
validate_super(mddev, rdev)) {
static ssize_t
level_store(mddev_t *mddev, const char *buf, size_t len)
{
- int rv = len;
+ ssize_t rv = len;
if (mddev->pers)
return -EBUSY;
if (len == 0)
static ssize_t
resync_start_store(mddev_t *mddev, const char *buf, size_t len)
{
- /* can only set chunk_size if array is not yet active */
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
err = do_md_stop(mddev, 1);
else {
mddev->ro = 1;
+ set_disk_ro(mddev->gendisk, 1);
err = do_md_run(mddev);
}
break;
case read_auto:
- /* stopping an active array */
if (mddev->pers) {
- err = do_md_stop(mddev, 1);
- if (err == 0)
- mddev->ro = 2; /* FIXME mark devices writable */
+ if (mddev->ro != 1)
+ err = do_md_stop(mddev, 1);
+ else
+ err = restart_array(mddev);
+ if (err == 0) {
+ mddev->ro = 2;
+ set_disk_ro(mddev->gendisk, 0);
+ }
} else {
mddev->ro = 2;
err = do_md_run(mddev);
if (atomic_read(&mddev->writes_pending) == 0) {
if (mddev->in_sync == 0) {
mddev->in_sync = 1;
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->persistent)
set_bit(MD_CHANGE_CLEAN,
&mddev->flags);
err = 0;
} else {
mddev->ro = 0;
+ set_disk_ro(mddev->gendisk, 0);
err = do_md_run(mddev);
}
break;
if (err < 0)
goto out;
}
- } else
+ } else if (mddev->external)
+ rdev = md_import_device(dev, -2, -1);
+ else
rdev = md_import_device(dev, -1, -1);
if (IS_ERR(rdev))
return len;
}
if (strncmp(buf, "external:", 9) == 0) {
- int namelen = len-9;
+ size_t namelen = len-9;
if (namelen >= sizeof(mddev->metadata_type))
namelen = sizeof(mddev->metadata_type)-1;
strncpy(mddev->metadata_type, buf+9, namelen);
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
+static ssize_t
+max_sync_show(mddev_t *mddev, char *page)
+{
+ if (mddev->resync_max == MaxSector)
+ return sprintf(page, "max\n");
+ else
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->resync_max);
+}
+static ssize_t
+max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ if (strncmp(buf, "max", 3) == 0)
+ mddev->resync_max = MaxSector;
+ else {
+ char *ep;
+ unsigned long long max = simple_strtoull(buf, &ep, 10);
+ if (ep == buf || (*ep != 0 && *ep != '\n'))
+ return -EINVAL;
+ if (max < mddev->resync_max &&
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return -EBUSY;
+
+ /* Must be a multiple of chunk_size */
+ if (mddev->chunk_size) {
+ if (max & (sector_t)((mddev->chunk_size>>9)-1))
+ return -EINVAL;
+ }
+ mddev->resync_max = max;
+ }
+ wake_up(&mddev->recovery_wait);
+ return len;
+}
+
+static struct md_sysfs_entry md_max_sync =
+__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
+
static ssize_t
suspend_lo_show(mddev_t *mddev, char *page)
{
&md_sync_max.attr,
&md_sync_speed.attr,
&md_sync_completed.attr,
+ &md_max_sync.attr,
&md_suspend_lo.attr,
&md_suspend_hi.attr,
&md_bitmap.attr,
/*
* Analyze all RAID superblock(s)
*/
- if (!mddev->raid_disks)
+ if (!mddev->raid_disks) {
+ if (!mddev->persistent)
+ return -EINVAL;
analyze_sbs(mddev);
+ }
chunk_size = mddev->chunk_size;
}
/* devices must have minimum size of one chunk */
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
if (rdev->size < chunk_size / 1024) {
* the only valid external interface is through the md
* device.
*/
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
mdk_rdev_t *rdev2;
struct list_head *tmp2;
int warned = 0;
- ITERATE_RDEV(mddev, rdev, tmp) {
- ITERATE_RDEV(mddev, rdev2, tmp2) {
+ rdev_for_each(rdev, tmp, mddev) {
+ rdev_for_each(rdev2, tmp2, mddev) {
if (rdev < rdev2 &&
rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) {
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
if (mddev->degraded && !mddev->sync_thread) {
struct list_head *rtmp;
int spares = 0;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
}
mddev->bitmap_offset = 0;
- ITERATE_RDEV(mddev,rdev,tmp)
+ rdev_for_each(rdev, tmp, mddev)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
- /* make sure all delayed_delete calls have finished */
+ /* make sure all md_delayed_delete calls have finished */
flush_scheduled_work();
export_array(mddev);
mddev->size = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
mddev->external = 0;
+ mddev->persistent = 0;
+ mddev->level = LEVEL_NONE;
+ mddev->clevel[0] = 0;
+ mddev->flags = 0;
+ mddev->ro = 0;
+ mddev->metadata_type[0] = 0;
+ mddev->chunk_size = 0;
+ mddev->ctime = mddev->utime = 0;
+ mddev->layout = 0;
+ mddev->max_disks = 0;
+ mddev->events = 0;
+ mddev->delta_disks = 0;
+ mddev->new_level = LEVEL_NONE;
+ mddev->new_layout = 0;
+ mddev->new_chunk = 0;
+ mddev->curr_resync = 0;
+ mddev->resync_mismatches = 0;
+ mddev->suspend_lo = mddev->suspend_hi = 0;
+ mddev->sync_speed_min = mddev->sync_speed_max = 0;
+ mddev->recovery = 0;
+ mddev->in_sync = 0;
+ mddev->changed = 0;
+ mddev->degraded = 0;
+ mddev->barriers_work = 0;
+ mddev->safemode = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
printk(KERN_INFO "md: running: ");
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
char b[BDEVNAME_SIZE];
printk("<%s>", bdevname(rdev->bdev,b));
}
printk(KERN_INFO "md: considering %s ...\n",
bdevname(rdev0->bdev,b));
INIT_LIST_HEAD(&candidates);
- ITERATE_RDEV_PENDING(rdev,tmp)
+ rdev_for_each_list(rdev, tmp, pending_raid_disks)
if (super_90_load(rdev, rdev0, 0) >= 0) {
printk(KERN_INFO "md: adding %s ...\n",
bdevname(rdev->bdev,b));
mddev_unlock(mddev);
} else {
printk(KERN_INFO "md: created %s\n", mdname(mddev));
- ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
+ mddev->persistent = 1;
+ rdev_for_each_list(rdev, tmp, candidates) {
list_del_init(&rdev->same_set);
if (bind_rdev_to_array(rdev, mddev))
export_rdev(rdev);
/* on success, candidates will be empty, on error
* it won't...
*/
- ITERATE_RDEV_GENERIC(candidates,rdev,tmp)
+ rdev_for_each_list(rdev, tmp, candidates)
export_rdev(rdev);
mddev_put(mddev);
}
struct list_head *tmp;
nr=working=active=failed=spare=0;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
nr++;
if (test_bit(Faulty, &rdev->flags))
failed++;
else
rdev->raid_disk = -1;
- rdev->flags = 0;
-
if (rdev->raid_disk < mddev->raid_disks)
if (info->state & (1<<MD_DISK_SYNC))
set_bit(In_sync, &rdev->flags);
return 0;
busy:
- printk(KERN_WARNING "md: cannot remove active disk %s from %s ... \n",
+ printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
bdevname(rdev->bdev,b), mdname(mddev));
return -EBUSY;
}
*/
if (mddev->sync_thread)
return -EBUSY;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
sector_t avail;
avail = rdev->size * 2;
*/
/* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
* RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */
- if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
- && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
- && cmd != GET_BITMAP_FILE) {
+ if ((!mddev->raid_disks && !mddev->external)
+ && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
+ && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
+ && cmd != GET_BITMAP_FILE) {
err = -ENODEV;
goto abort_unlock;
}
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
+
+ if (mddev->external)
+ set_bit(Blocked, &rdev->flags);
/*
dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
mdname(mddev),
seq_printf(seq, "unused devices: ");
- ITERATE_RDEV_PENDING(rdev,tmp) {
+ rdev_for_each_list(rdev, tmp, pending_raid_disks) {
char b[BDEVNAME_SIZE];
i++;
seq_printf(seq, "%s ",
if (mddev->ro==1)
seq_printf(seq, " (read-only)");
if (mddev->ro==2)
- seq_printf(seq, "(auto-read-only)");
+ seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
}
size = 0;
- ITERATE_RDEV(mddev,rdev,tmp2) {
+ rdev_for_each(rdev, tmp2, mddev) {
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr);
chunk_kb ? "KB" : "B");
if (bitmap->file) {
seq_printf(seq, ", file: ");
- seq_path(seq, bitmap->file->f_path.mnt,
- bitmap->file->f_path.dentry," \t\n");
+ seq_path(seq, &bitmap->file->f_path, " \t\n");
}
seq_printf(seq, "\n");
long curr_events;
idle = 1;
- ITERATE_RDEV(mddev,rdev,tmp) {
+ rdev_for_each(rdev, tmp, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
curr_events = disk_stat_read(disk, sectors[0]) +
disk_stat_read(disk, sectors[1]) -
mddev->ro = 0;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(mddev->sync_thread);
}
atomic_inc(&mddev->writes_pending);
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
if (mddev->in_sync) {
spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto skip;
}
- ITERATE_MDDEV(mddev2,tmp) {
+ for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
if (mddev2->curr_resync &&
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1;
j = MaxSector;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
sector_t sectors;
skipped = 0;
+ if (j >= mddev->resync_max) {
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ wait_event(mddev->recovery_wait,
+ mddev->resync_max > j
+ || kthread_should_stop());
+ }
+ if (kthread_should_stop())
+ goto interrupted;
sectors = mddev->pers->sync_request(mddev, j, &skipped,
- currspeed < speed_min(mddev));
+ currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
goto out;
}
- if (kthread_should_stop()) {
- /*
- * got a signal, exit.
- */
- printk(KERN_INFO
- "md: md_do_sync() got signal ... exiting\n");
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- goto out;
- }
+ if (kthread_should_stop())
+ goto interrupted;
+
/*
* this loop exits only if either when we are slower than
} else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
skip:
mddev->curr_resync = 0;
+ mddev->resync_max = MaxSector;
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ return;
+
+ interrupted:
+ /*
+ * got a signal, exit.
+ */
+ printk(KERN_INFO
+ "md: md_do_sync() got signal ... exiting\n");
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ goto out;
+
}
EXPORT_SYMBOL_GPL(md_do_sync);
struct list_head *rtmp;
int spares = 0;
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk >= 0 &&
+ !test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
}
if (mddev->degraded) {
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
return;
if (signal_pending(current)) {
- if (mddev->pers->sync_request) {
+ if (mddev->pers->sync_request && !mddev->external) {
printk(KERN_INFO "md: %s in immediate safe mode\n",
mdname(mddev));
mddev->safemode = 2;
(mddev->flags && !mddev->external) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- (mddev->safemode == 1) ||
+ (mddev->external == 0 && mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
))
if (mddev_trylock(mddev)) {
int spares = 0;
- spin_lock_irq(&mddev->write_lock);
- if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
- !mddev->in_sync && mddev->recovery_cp == MaxSector) {
- mddev->in_sync = 1;
- if (mddev->persistent)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ if (!mddev->external) {
+ spin_lock_irq(&mddev->write_lock);
+ if (mddev->safemode &&
+ !atomic_read(&mddev->writes_pending) &&
+ !mddev->in_sync &&
+ mddev->recovery_cp == MaxSector) {
+ mddev->in_sync = 1;
+ if (mddev->persistent)
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ }
+ if (mddev->safemode == 1)
+ mddev->safemode = 0;
+ spin_unlock_irq(&mddev->write_lock);
}
- if (mddev->safemode == 1)
- mddev->safemode = 0;
- spin_unlock_irq(&mddev->write_lock);
if (mddev->flags)
md_update_sb(mddev, 0);
* information must be scrapped
*/
if (!mddev->degraded)
- ITERATE_RDEV(mddev,rdev,rtmp)
+ rdev_for_each(rdev, rtmp, mddev)
rdev->saved_raid_disk = -1;
mddev->recovery = 0;
}
}
+void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+ sysfs_notify(&rdev->kobj, NULL, "state");
+ wait_event_timeout(rdev->blocked_wait,
+ !test_bit(Blocked, &rdev->flags),
+ msecs_to_jiffies(5000));
+ rdev_dec_pending(rdev, mddev);
+}
+EXPORT_SYMBOL(md_wait_for_blocked_rdev);
+
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
printk(KERN_INFO "md: stopping all md devices.\n");
- ITERATE_MDDEV(mddev,tmp)
+ for_each_mddev(mddev, tmp)
if (mddev_trylock(mddev)) {
do_md_stop (mddev, 1);
mddev_unlock(mddev);
static void md_geninit(void)
{
- struct proc_dir_entry *p;
-
dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
- p = create_proc_entry("mdstat", S_IRUGO, NULL);
- if (p)
- p->proc_fops = &md_seq_fops;
+ proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
}
static int __init md_init(void)
MD_BUG();
continue;
}
+ set_bit(AutoDetected, &rdev->flags);
list_add(&rdev->same_set, &pending_raid_disks);
i_passed++;
}
unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL);
- ITERATE_MDDEV(mddev,tmp) {
+ for_each_mddev(mddev, tmp) {
struct gendisk *disk = mddev->gendisk;
if (!disk)
continue;