Merge branch 'md-raid6-accel' into ioat3.2
[linux-2.6.git] / drivers / md / raid5.c
index 364ea37..0a5cf21 100644 (file)
@@ -364,7 +364,7 @@ static void raid5_unplug_device(struct request_queue *q);
 
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
-                 int previous, int noblock)
+                 int previous, int noblock, int noquiesce)
 {
        struct stripe_head *sh;
 
@@ -374,7 +374,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
 
        do {
                wait_event_lock_irq(conf->wait_for_stripe,
-                                   conf->quiesce == 0,
+                                   conf->quiesce == 0 || noquiesce,
                                    conf->device_lock, /* nothing */);
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
@@ -2826,7 +2826,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                        sector_t bn = compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
                                                          &dd_idx, NULL);
-                       sh2 = get_active_stripe(conf, s, 0, 1);
+                       sh2 = get_active_stripe(conf, s, 0, 1, 1);
                        if (sh2 == NULL)
                                /* so far only the early blocks of this stripe
                                 * have been requested.  When later blocks
@@ -3100,7 +3100,7 @@ static bool handle_stripe5(struct stripe_head *sh)
        /* Finish reconstruct operations initiated by the expansion process */
        if (sh->reconstruct_state == reconstruct_state_result) {
                struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1);
+                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
                        /* sh cannot be written until sh2 has been read.
                         * so arrange for sh to be delayed a little
@@ -3397,7 +3397,7 @@ static bool handle_stripe6(struct stripe_head *sh)
        if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
            !sh->reconstruct_state) {
                struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1);
+                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
                        /* sh cannot be written until sh2 has been read.
                         * so arrange for sh to be delayed a little
@@ -3491,7 +3491,7 @@ static void unplug_slaves(mddev_t *mddev)
        int i;
 
        rcu_read_lock();
-       for (i=0; i<mddev->raid_disks; i++) {
+       for (i = 0; i < conf->raid_disks; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
                if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
                        struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3878,7 +3878,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
                        (unsigned long long)logical_sector);
 
                sh = get_active_stripe(conf, new_sector, previous,
-                                      (bi->bi_rw&RWA_MASK));
+                                      (bi->bi_rw&RWA_MASK), 0);
                if (sh) {
                        if (unlikely(previous)) {
                                /* expansion might have moved on while waiting for a
@@ -4014,13 +4014,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
        safepos = conf->reshape_safe;
        sector_div(safepos, data_disks);
        if (mddev->delta_disks < 0) {
-               writepos -= reshape_sectors;
+               writepos -= min_t(sector_t, reshape_sectors, writepos);
                readpos += reshape_sectors;
                safepos += reshape_sectors;
        } else {
                writepos += reshape_sectors;
-               readpos -= reshape_sectors;
-               safepos -= reshape_sectors;
+               readpos -= min_t(sector_t, reshape_sectors, readpos);
+               safepos -= min_t(sector_t, reshape_sectors, safepos);
        }
 
        /* 'writepos' is the most advanced device address we might write.
@@ -4048,6 +4048,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                wait_event(conf->wait_for_overlap,
                           atomic_read(&conf->reshape_stripes)==0);
                mddev->reshape_position = conf->reshape_progress;
+               mddev->curr_resync_completed = mddev->curr_resync;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
@@ -4057,6 +4058,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
+               sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
 
        if (mddev->delta_disks < 0) {
@@ -4074,7 +4076,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
        for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                int j;
                int skipped = 0;
-               sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+               sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
                set_bit(STRIPE_EXPANDING, &sh->state);
                atomic_inc(&conf->reshape_stripes);
                /* If any of this stripe is beyond the end of the old
@@ -4117,13 +4119,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                raid5_compute_sector(conf, stripe_addr*(new_data_disks),
                                     1, &dd_idx, NULL);
        last_sector =
-               raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+               raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
                                            *(new_data_disks) - 1),
                                     1, &dd_idx, NULL);
        if (last_sector >= mddev->dev_sectors)
                last_sector = mddev->dev_sectors - 1;
        while (first_sector <= last_sector) {
-               sh = get_active_stripe(conf, first_sector, 1, 0);
+               sh = get_active_stripe(conf, first_sector, 1, 0, 1);
                set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
                set_bit(STRIPE_HANDLE, &sh->state);
                release_stripe(sh);
@@ -4141,11 +4143,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
         * then we need to write out the superblock.
         */
        sector_nr += reshape_sectors;
-       if (sector_nr >= mddev->resync_max) {
+       if ((sector_nr - mddev->curr_resync_completed) * 2
+           >= mddev->resync_max - mddev->curr_resync_completed) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
                           atomic_read(&conf->reshape_stripes) == 0);
                mddev->reshape_position = conf->reshape_progress;
+               mddev->curr_resync_completed = mddev->curr_resync;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
@@ -4156,6 +4160,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
+               sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
        return reshape_sectors;
 }
@@ -4220,9 +4225,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
        bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
-       sh = get_active_stripe(conf, sector_nr, 0, 1);
+       sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
        if (sh == NULL) {
-               sh = get_active_stripe(conf, sector_nr, 0, 0);
+               sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
                /* make sure we don't swamp the stripe cache if someone else
                 * is trying to get access
                 */
@@ -4232,7 +4237,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         * We don't need to check the 'failed' flag as when that gets set,
         * recovery aborts.
         */
-       for (i=0; i<mddev->raid_disks; i++)
+       for (i = 0; i < conf->raid_disks; i++)
                if (conf->disks[i].rdev == NULL)
                        still_degraded = 1;
 
@@ -4284,7 +4289,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                        /* already done this stripe */
                        continue;
 
-               sh = get_active_stripe(conf, sector, 0, 1);
+               sh = get_active_stripe(conf, sector, 0, 1, 0);
 
                if (!sh) {
                        /* failed to get a stripe - must wait */