md: handle_stripe5 - add request/completion logic for async expand ops
Dan Williams [Tue, 2 Jan 2007 20:52:31 +0000 (13:52 -0700)]
When a stripe is being expanded bulk copying takes place to move the data
from the old stripe to the new.  Since raid5_run_ops only operates on one
stripe at a time these bulk copies are handled in-line under the stripe
lock.  In the dma offload case we poll for the completion of the operation.

After the data has been copied into the new stripe the parity needs to be
recalculated across the new disks.  We reuse the existing postxor
functionality to carry out this calculation.  By setting STRIPE_OP_POSTXOR
without setting STRIPE_OP_BIODRAIN the completion path in handle stripe
can differentiate expand operations from normal write operations.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>

drivers/md/raid5.c

index a33dac7..c6e0e2b 100644 (file)
@@ -2653,6 +2653,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
        /* We have read all the blocks in this stripe and now we need to
         * copy some of them into a target stripe for expand.
         */
+       struct dma_async_tx_descriptor *tx = NULL;
        clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
        for (i = 0; i < sh->disks; i++)
                if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) {
@@ -2678,9 +2679,12 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                                release_stripe(sh2);
                                continue;
                        }
-                       memcpy(page_address(sh2->dev[dd_idx].page),
-                              page_address(sh->dev[i].page),
-                              STRIPE_SIZE);
+
+                       /* place all the copies on one channel */
+                       tx = async_memcpy(sh2->dev[dd_idx].page,
+                               sh->dev[i].page, 0, 0, STRIPE_SIZE,
+                               ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+
                        set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
                        set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
                        for (j = 0; j < conf->raid_disks; j++)
@@ -2693,6 +2697,12 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                                set_bit(STRIPE_HANDLE, &sh2->state);
                        }
                        release_stripe(sh2);
+
+                       /* done submitting copies, wait for them to complete */
+                       if (i + 1 >= sh->disks) {
+                               async_tx_ack(tx);
+                               dma_wait_for_async_tx(tx);
+                       }
                }
 }
 
@@ -2931,18 +2941,34 @@ static void handle_stripe5(struct stripe_head *sh)
                }
        }
 
-       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
-               /* Need to write out all blocks after computing parity */
-               sh->disks = conf->raid_disks;
-               sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
-               compute_parity5(sh, RECONSTRUCT_WRITE);
+       /* Finish postxor operations initiated by the expansion
+        * process
+        */
+       if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
+               !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
+
+               clear_bit(STRIPE_EXPANDING, &sh->state);
+
+               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
+               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
+               clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
+
                for (i = conf->raid_disks; i--; ) {
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       s.locked++;
                        set_bit(R5_Wantwrite, &sh->dev[i].flags);
+                       if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
+                               sh->ops.count++;
                }
-               clear_bit(STRIPE_EXPANDING, &sh->state);
-       } else if (s.expanded) {
+       }
+
+       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
+               /* Need to write out all blocks after computing parity */
+               sh->disks = conf->raid_disks;
+               sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
+                       conf->raid_disks);
+               s.locked += handle_write_operations5(sh, 0, 1);
+       } else if (s.expanded &&
+               !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
                wake_up(&conf->wait_for_overlap);