const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
#endif
+/*
+ * We maintain a biased count of active stripes in the bottom 16 bits of
+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
+ */
+static inline int raid5_bi_phys_segments(struct bio *bio)
+{
+ return bio->bi_phys_segments & 0xffff;
+}
+
+static inline int raid5_bi_hw_segments(struct bio *bio)
+{
+ return (bio->bi_phys_segments >> 16) & 0xffff;
+}
+
+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+{
+ --bio->bi_phys_segments;
+ return raid5_bi_phys_segments(bio);
+}
+
+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+{
+ unsigned short val = raid5_bi_hw_segments(bio);
+
+ --val;
+ bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
+ return val;
+}
+
+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+{
+ bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
+}
+
static inline int raid6_next_disk(int disk, int raid_disks)
{
disk++;
while (rbi && rbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev->sector);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
release_stripe(sh);
}
-static struct dma_async_tx_descriptor *
-ops_run_compute5(struct stripe_head *sh, unsigned long ops_request)
+static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
{
/* kernel stack size limits the total number of disks */
int disks = sh->disks;
ASYNC_TX_XOR_ZERO_DST, NULL,
ops_complete_compute5, sh);
- /* ack now if postxor is not set to be run */
- if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
- async_tx_ack(tx);
-
return tx;
}
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
/* Only process blocks that are known to be uptodate */
- if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags))
+ if (test_bit(R5_Wantdrain, &dev->flags))
xor_srcs[count++] = dev->page;
}
}
static struct dma_async_tx_descriptor *
-ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
- unsigned long ops_request)
+ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
{
int disks = sh->disks;
- int pd_idx = sh->pd_idx, i;
-
- /* check if prexor is active which means only process blocks
- * that are part of a read-modify-write (Wantprexor)
- */
- int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
+ int i;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
struct bio *chosen;
- int towrite;
- towrite = 0;
- if (prexor) { /* rmw */
- if (dev->towrite &&
- test_bit(R5_Wantprexor, &dev->flags))
- towrite = 1;
- } else { /* rcw */
- if (i != pd_idx && dev->towrite &&
- test_bit(R5_LOCKED, &dev->flags))
- towrite = 1;
- }
-
- if (towrite) {
+ if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
struct bio *wbi;
spin_lock(&sh->lock);
static void ops_complete_postxor(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
-
- pr_debug("%s: stripe %llu\n", __func__,
- (unsigned long long)sh->sector);
-
- sh->reconstruct_state = reconstruct_state_result;
- set_bit(STRIPE_HANDLE, &sh->state);
- release_stripe(sh);
-}
-
-static void ops_complete_write(void *stripe_head_ref)
-{
- struct stripe_head *sh = stripe_head_ref;
int disks = sh->disks, i, pd_idx = sh->pd_idx;
pr_debug("%s: stripe %llu\n", __func__,
set_bit(R5_UPTODATE, &dev->flags);
}
- sh->reconstruct_state = reconstruct_state_drain_result;
+ if (sh->reconstruct_state == reconstruct_state_drain_run)
+ sh->reconstruct_state = reconstruct_state_drain_result;
+ else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run)
+ sh->reconstruct_state = reconstruct_state_prexor_drain_result;
+ else {
+ BUG_ON(sh->reconstruct_state != reconstruct_state_run);
+ sh->reconstruct_state = reconstruct_state_result;
+ }
+
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
}
static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
- unsigned long ops_request)
+ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
{
/* kernel stack size limits the total number of disks */
int disks = sh->disks;
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest;
- int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
+ int prexor = 0;
unsigned long flags;
- dma_async_tx_callback callback;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
/* check if prexor is active which means only process blocks
* that are part of a read-modify-write (written)
*/
- if (prexor) {
+ if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+ prexor = 1;
xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
}
}
- /* check whether this postxor is part of a write */
- callback = test_bit(STRIPE_OP_BIODRAIN, &ops_request) ?
- ops_complete_write : ops_complete_postxor;
-
/* 1/ if we prexor'd then the dest is reused as a source
* 2/ if we did not prexor then we are redoing the parity
* set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
if (unlikely(count == 1)) {
flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
- flags, tx, callback, sh);
+ flags, tx, ops_complete_postxor, sh);
} else
tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
- flags, tx, callback, sh);
+ flags, tx, ops_complete_postxor, sh);
}
static void ops_complete_check(void *stripe_head_ref)
overlap_clear++;
}
- if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
- tx = ops_run_compute5(sh, ops_request);
+ if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
+ tx = ops_run_compute5(sh);
+ /* terminate the chain if postxor is not set to be run */
+ if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+ async_tx_ack(tx);
+ }
if (test_bit(STRIPE_OP_PREXOR, &ops_request))
tx = ops_run_prexor(sh, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
- tx = ops_run_biodrain(sh, tx, ops_request);
+ tx = ops_run_biodrain(sh, tx);
overlap_clear++;
}
if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
- ops_run_postxor(sh, tx, ops_request);
+ ops_run_postxor(sh, tx);
if (test_bit(STRIPE_OP_CHECK, &ops_request))
ops_run_check(sh);
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
- int err = 0;
+ int err;
struct kmem_cache *sc;
int i;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
- md_allow_write(conf->mddev);
+ err = md_allow_write(conf->mddev);
+ if (err)
+ return err;
/* Step 1 */
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
}
static void
-handle_write_operations5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
{
int i, pd_idx = sh->pd_idx, disks = sh->disks;
if (dev->towrite) {
set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_Wantdrain, &dev->flags);
if (!expand)
clear_bit(R5_UPTODATE, &dev->flags);
s->locked++;
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
- sh->reconstruct_state = reconstruct_state_drain_run;
+ sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
if (i == pd_idx)
continue;
- /* For a read-modify write there may be blocks that are
- * locked for reading while others are ready to be
- * written so we distinguish these blocks by the
- * R5_Wantprexor bit
- */
if (dev->towrite &&
(test_bit(R5_UPTODATE, &dev->flags) ||
- test_bit(R5_Wantcompute, &dev->flags))) {
- set_bit(R5_Wantprexor, &dev->flags);
+ test_bit(R5_Wantcompute, &dev->flags))) {
+ set_bit(R5_Wantdrain, &dev->flags);
set_bit(R5_LOCKED, &dev->flags);
clear_bit(R5_UPTODATE, &dev->flags);
s->locked++;
if (*bip)
bi->bi_next = *bip;
*bip = bi;
- bi->bi_phys_segments ++;
+ bi->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sh->lock);
}
static void
-handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
+handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s, int disks,
struct bio **return_bi)
{
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
bi->bi_next = *return_bi;
*return_bi = bi;
}
md_wakeup_thread(conf->mddev->thread);
}
-/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
- * to process
+/* fetch_block5 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill5 to continue
*/
-static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
- struct stripe_head_state *s, int disk_idx, int disks)
+static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
+ int disk_idx, int disks)
{
struct r5dev *dev = &sh->dev[disk_idx];
struct r5dev *failed_dev = &sh->dev[s->failed_num];
/* is the data in this block needed, and can we get it? */
if (!test_bit(R5_LOCKED, &dev->flags) &&
- !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
- s->syncing || s->expanding || (s->failed &&
- (failed_dev->toread || (failed_dev->towrite &&
- !test_bit(R5_OVERWRITE, &failed_dev->flags)
- ))))) {
+ !test_bit(R5_UPTODATE, &dev->flags) &&
+ (dev->toread ||
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+ s->syncing || s->expanding ||
+ (s->failed &&
+ (failed_dev->toread ||
+ (failed_dev->towrite &&
+ !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) {
/* We would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync
*/
* subsequent operation.
*/
s->uptodate++;
- return 0; /* uptodate + compute == disks */
+ return 1; /* uptodate + compute == disks */
} else if (test_bit(R5_Insync, &dev->flags)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
}
}
- return ~0;
+ return 0;
}
-static void handle_issuing_new_read_requests5(struct stripe_head *sh,
+/**
+ * handle_stripe_fill5 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill5(struct stripe_head *sh,
struct stripe_head_state *s, int disks)
{
int i;
* midst of changing due to a write
*/
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
- !sh->reconstruct_state) {
+ !sh->reconstruct_state)
for (i = disks; i--; )
- if (__handle_issuing_new_read_requests5(
- sh, s, i, disks) == 0)
+ if (fetch_block5(sh, s, i, disks))
break;
- }
set_bit(STRIPE_HANDLE, &sh->state);
}
-static void handle_issuing_new_read_requests6(struct stripe_head *sh,
+static void handle_stripe_fill6(struct stripe_head *sh,
struct stripe_head_state *s, struct r6_state *r6s,
int disks)
{
}
-/* handle_completed_write_requests
+/* handle_stripe_clean_event
* any written block on an uptodate or failed drive can be returned.
* Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
* never LOCKED, so we don't need to test 'failed' directly.
*/
-static void handle_completed_write_requests(raid5_conf_t *conf,
+static void handle_stripe_clean_event(raid5_conf_t *conf,
struct stripe_head *sh, int disks, struct bio **return_bi)
{
int i;
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
- if (--wbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(wbi)) {
md_write_end(conf->mddev);
wbi->bi_next = *return_bi;
*return_bi = wbi;
md_wakeup_thread(conf->mddev->thread);
}
-static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
+static void handle_stripe_dirtying5(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s, int disks)
{
int rmw = 0, rcw = 0, i;
if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
(s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state)))
- handle_write_operations5(sh, s, rcw == 0, 0);
+ schedule_reconstruction5(sh, s, rcw == 0, 0);
}
-static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
+static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct stripe_head *sh, struct stripe_head_state *s,
struct r6_state *r6s, int disks)
{
*
*/
-static void handle_stripe5(struct stripe_head *sh)
+static bool handle_stripe5(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
* need to be failed
*/
if (s.failed > 1 && s.to_read+s.to_write+s.written)
- handle_requests_to_failed_array(conf, sh, &s, disks,
- &return_bi);
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
if (s.failed > 1 && s.syncing) {
md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
!test_bit(R5_LOCKED, &dev->flags) &&
test_bit(R5_UPTODATE, &dev->flags)) ||
(s.failed == 1 && s.failed_num == sh->pd_idx)))
- handle_completed_write_requests(conf, sh, disks, &return_bi);
+ handle_stripe_clean_event(conf, sh, disks, &return_bi);
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
*/
if (s.to_read || s.non_overwrite ||
(s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
- handle_issuing_new_read_requests5(sh, &s, disks);
+ handle_stripe_fill5(sh, &s, disks);
/* Now we check to see if any write operations have recently
* completed
*/
prexor = 0;
- if (sh->reconstruct_state == reconstruct_state_drain_result) {
+ if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
+ prexor = 1;
+ if (sh->reconstruct_state == reconstruct_state_drain_result ||
+ sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
sh->reconstruct_state = reconstruct_state_idle;
- for (i = disks; i--; )
- prexor += test_and_clear_bit(R5_Wantprexor,
- &sh->dev[i].flags);
/* All the 'written' buffers and the parity block are ready to
* be written back to disk
* block.
*/
if (s.to_write && !sh->reconstruct_state && !sh->check_state)
- handle_issuing_new_write_requests5(conf, sh, &s, disks);
+ handle_stripe_dirtying5(conf, sh, &s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
if (sh->reconstruct_state == reconstruct_state_result) {
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
- for (i = conf->raid_disks; i--; )
+ for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
- set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
+ }
}
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
sh->disks = conf->raid_disks;
sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
conf->raid_disks);
- handle_write_operations5(sh, &s, 1, 1);
+ schedule_reconstruction5(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
{
raid6_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
copy_data(0, rbi, dev->page, dev->sector);
rbi2 = r5_next_bio(rbi, dev->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
+
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
* might need to be failed
*/
if (s.failed > 2 && s.to_read+s.to_write+s.written)
- handle_requests_to_failed_array(conf, sh, &s, disks,
- &return_bi);
+ handle_failed_stripe(conf, sh, &s, disks, &return_bi);
if (s.failed > 2 && s.syncing) {
md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state);
( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
&& !test_bit(R5_LOCKED, &qdev->flags)
&& test_bit(R5_UPTODATE, &qdev->flags)))))
- handle_completed_write_requests(conf, sh, disks, &return_bi);
+ handle_stripe_clean_event(conf, sh, disks, &return_bi);
/* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests
*/
if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
(s.syncing && (s.uptodate < disks)) || s.expanding)
- handle_issuing_new_read_requests6(sh, &s, &r6s, disks);
+ handle_stripe_fill6(sh, &s, &r6s, disks);
/* now to consider writing and what else, if anything should be read */
if (s.to_write)
- handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks);
+ handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+/* returns true if the stripe was handled */
+static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
{
if (sh->raid_conf->level == 6)
- handle_stripe6(sh, tmp_page);
+ return handle_stripe6(sh, tmp_page);
else
- handle_stripe5(sh);
+ return handle_stripe5(sh);
}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
*/
-static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid5_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
- if (bio_data_dir(bio) == WRITE)
+ if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
+ /*
+ * this sets the active strip count to 1 and the processed
+ * strip count to zero (upper 8 bits)
+ */
bi->bi_phys_segments = 1; /* biased count of active stripes */
- bi->bi_hw_segments = 0; /* count of processed stripes */
}
return bi;
if ((bi->bi_size>>9) > q->max_sectors)
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments ||
- bi->bi_hw_segments > q->max_hw_segments)
+ if (bi->bi_phys_segments > q->max_phys_segments)
return 0;
if (q->merge_bvec_fn)
}
spin_lock_irq(&conf->device_lock);
- remaining = --bi->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(bi);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0) {
j == raid6_next_disk(sh->pd_idx, sh->disks))
continue;
s = compute_blocknr(sh, j);
- if (s < (mddev->array_size<<1)) {
+ if (s < mddev->array_sectors) {
skipped = 1;
continue;
}
clear_bit(STRIPE_INSYNC, &sh->state);
spin_unlock(&sh->lock);
- handle_stripe(sh, NULL);
+ /* wait for any blocked device to be handled */
+ while(unlikely(!handle_stripe(sh, NULL)))
+ ;
release_stripe(sh);
return STRIPE_SECTORS;
sector += STRIPE_SECTORS,
scnt++) {
- if (scnt < raid_bio->bi_hw_segments)
+ if (scnt < raid5_bi_hw_segments(raid_bio))
/* already done this stripe */
continue;
if (!sh) {
/* failed to get a stripe - must wait */
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
release_stripe(sh);
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
handled++;
}
spin_lock_irq(&conf->device_lock);
- remaining = --raid_bio->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(raid_bio);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0)
bio_endio(raid_bio, 0);
sh = __get_priority_stripe(conf);
- if (!sh) {
- async_tx_issue_pending_all();
+ if (!sh)
break;
- }
spin_unlock_irq(&conf->device_lock);
handled++;
spin_unlock_irq(&conf->device_lock);
+ async_tx_issue_pending_all();
unplug_slaves(mddev);
pr_debug("--- raid5d inactive\n");
{
raid5_conf_t *conf = mddev_to_conf(mddev);
unsigned long new;
+ int err;
+
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
else
break;
}
- md_allow_write(mddev);
+ err = md_allow_write(mddev);
+ if (err)
+ return err;
while (new > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
- mddev->array_size = mddev->size * (conf->previous_raid_disks -
+ mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks -
conf->max_degraded);
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
raid5_conf_t *conf = mddev_to_conf(mddev);
sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
- mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1;
- set_capacity(mddev->gendisk, mddev->array_size << 1);
+ mddev->array_sectors = sectors * (mddev->raid_disks
+ - conf->max_degraded);
+ set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
return -EINVAL; /* Cannot shrink array or change level yet */
if (mddev->delta_disks == 0)
return 0; /* nothing to do */
+ if (mddev->bitmap)
+ /* Cannot grow a bitmap yet */
+ return -EBUSY;
/* Can only proceed if there are plenty of stripe_heads.
* We need a minimum of one full stripe,, and for sensible progress
struct block_device *bdev;
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
- conf->mddev->array_size = conf->mddev->size *
+ conf->mddev->array_sectors = 2 * conf->mddev->size *
(conf->raid_disks - conf->max_degraded);
- set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
+ set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
conf->mddev->changed = 1;
bdev = bdget_disk(conf->mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode, (loff_t)conf->mddev->array_size << 10);
+ i_size_write(bdev->bd_inode,
+ (loff_t)conf->mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}