block: separate priority boosting from REQ_META
Christoph Hellwig [Tue, 23 Aug 2011 12:50:29 +0000 (14:50 +0200)]
Add a new REQ_PRIO to let requests preempt others in the cfq I/O schedule,
and lave REQ_META purely for marking requests as metadata in blktrace.

All existing callers of REQ_META except for XFS are updated to also
set REQ_PRIO for now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

block/cfq-iosched.c
drivers/mmc/card/block.c
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/gfs2/log.c
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
include/linux/blk_types.h

index a33bd43..16ace89 100644 (file)
@@ -130,8 +130,8 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
 
-       /* pending metadata requests */
-       int meta_pending;
+       /* pending priority requests */
+       int prio_pending;
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
 
@@ -684,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
        if (rq_is_sync(rq1) != rq_is_sync(rq2))
                return rq_is_sync(rq1) ? rq1 : rq2;
 
-       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
-               return rq1->cmd_flags & REQ_META ? rq1 : rq2;
+       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
+               return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
 
        s1 = blk_rq_pos(rq1);
        s2 = blk_rq_pos(rq2);
@@ -1612,9 +1612,9 @@ static void cfq_remove_request(struct request *rq)
        cfqq->cfqd->rq_queued--;
        cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                        rq_data_dir(rq), rq_is_sync(rq));
-       if (rq->cmd_flags & REQ_META) {
-               WARN_ON(!cfqq->meta_pending);
-               cfqq->meta_pending--;
+       if (rq->cmd_flags & REQ_PRIO) {
+               WARN_ON(!cfqq->prio_pending);
+               cfqq->prio_pending--;
        }
 }
 
@@ -3372,7 +3372,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
         * So both queues are sync. Let the new request get disk time if
         * it's a metadata request and the current queue is doing regular IO.
         */
-       if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
+       if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
                return true;
 
        /*
@@ -3439,8 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_io_context *cic = RQ_CIC(rq);
 
        cfqd->rq_queued++;
-       if (rq->cmd_flags & REQ_META)
-               cfqq->meta_pending++;
+       if (rq->cmd_flags & REQ_PRIO)
+               cfqq->prio_pending++;
 
        cfq_update_io_thinktime(cfqd, cfqq, cic);
        cfq_update_io_seektime(cfqd, cfqq, rq);
index 1ff5486..4c1a648 100644 (file)
@@ -926,6 +926,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
        /*
         * Reliable writes are used to implement Forced Unit Access and
         * REQ_META accesses, and are supported only on MMCs.
+        *
+        * XXX: this really needs a good explanation of why REQ_META
+        * is treated special.
         */
        bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
                          (req->cmd_flags & REQ_META)) &&
index bba7b96..12661e1 100644 (file)
@@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
                return bh;
        if (buffer_uptodate(bh))
                return bh;
-       ll_rw_block(READ | REQ_META, 1, &bh);
+       ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
@@ -2807,7 +2807,7 @@ make_io:
                trace_ext3_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
-               submit_bh(READ | REQ_META, bh);
+               submit_bh(READ | REQ_META | REQ_PRIO, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        ext3_error(inode->i_sb, "ext3_get_inode_loc",
index c7d4032..0629e09 100644 (file)
@@ -922,7 +922,8 @@ restart:
                                bh = ext3_getblk(NULL, dir, b++, 0, &err);
                                bh_use[ra_max] = bh;
                                if (bh)
-                                       ll_rw_block(READ | REQ_META, 1, &bh);
+                                       ll_rw_block(READ | REQ_META | REQ_PRIO,
+                                                   1, &bh);
                        }
                }
                if ((bh = bh_use[ra_ptr++]) == NULL)
index 1dfa18f..c7cbb3d 100644 (file)
@@ -650,7 +650,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
                return bh;
        if (buffer_uptodate(bh))
                return bh;
-       ll_rw_block(READ | REQ_META, 1, &bh);
+       ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
        wait_on_buffer(bh);
        if (buffer_uptodate(bh))
                return bh;
@@ -3301,7 +3301,7 @@ make_io:
                trace_ext4_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
-               submit_bh(READ | REQ_META, bh);
+               submit_bh(READ | REQ_META | REQ_PRIO, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        EXT4_ERROR_INODE_BLOCK(inode, block,
index d36315a..1c924fa 100644 (file)
@@ -922,7 +922,8 @@ restart:
                                bh = ext4_getblk(NULL, dir, b++, 0, &err);
                                bh_use[ra_max] = bh;
                                if (bh)
-                                       ll_rw_block(READ | REQ_META, 1, &bh);
+                                       ll_rw_block(READ | REQ_META | REQ_PRIO,
+                                                   1, &bh);
                        }
                }
                if ((bh = bh_use[ra_ptr++]) == NULL)
index 85c6292..5986464 100644 (file)
@@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        bh->b_end_io = end_buffer_write_sync;
        get_bh(bh);
        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
-               submit_bh(WRITE_SYNC | REQ_META, bh);
+               submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
        else
-               submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
+               submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh);
        wait_on_buffer(bh);
 
        if (!buffer_uptodate(bh))
index 747238c..be29858 100644 (file)
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
        struct buffer_head *bh, *head;
        int nr_underway = 0;
-       int write_op = REQ_META |
+       int write_op = REQ_META | REQ_PRIO |
                (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
 
        BUG_ON(!PageLocked(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        }
        bh->b_end_io = end_buffer_read_sync;
        get_bh(bh);
-       submit_bh(READ_SYNC | REQ_META, bh);
+       submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
        if (!(flags & DIO_WAIT))
                return 0;
 
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        if (buffer_uptodate(first_bh))
                goto out;
        if (!buffer_locked(first_bh))
-               ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
+               ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh);
 
        dblock++;
        extlen--;
index 3bc073a..079587e 100644 (file)
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
 
        bio->bi_end_io = end_bio_io_page;
        bio->bi_private = page;
-       submit_bio(READ_SYNC | REQ_META, bio);
+       submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio);
        wait_on_page_locked(page);
        bio_put(bio);
        if (!PageUptodate(page)) {
index 0534340..0e8bb13 100644 (file)
@@ -709,7 +709,7 @@ get_a_page:
                set_buffer_uptodate(bh);
 
        if (!buffer_uptodate(bh)) {
-               ll_rw_block(READ | REQ_META, 1, &bh);
+               ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh))
                        goto unlock_out;
index 32f0076..71fc53b 100644 (file)
@@ -124,6 +124,7 @@ enum rq_flag_bits {
 
        __REQ_SYNC,             /* request is sync (sync write or read) */
        __REQ_META,             /* metadata io request */
+       __REQ_PRIO,             /* boost priority in cfq */
        __REQ_DISCARD,          /* request to discard sectors */
        __REQ_SECURE,           /* secure discard (used with __REQ_DISCARD) */
 
@@ -161,14 +162,15 @@ enum rq_flag_bits {
 #define REQ_FAILFAST_DRIVER    (1 << __REQ_FAILFAST_DRIVER)
 #define REQ_SYNC               (1 << __REQ_SYNC)
 #define REQ_META               (1 << __REQ_META)
+#define REQ_PRIO               (1 << __REQ_PRIO)
 #define REQ_DISCARD            (1 << __REQ_DISCARD)
 #define REQ_NOIDLE             (1 << __REQ_NOIDLE)
 
 #define REQ_FAILFAST_MASK \
        (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
-       (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \
-        REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
+       (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
+        REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
 #define REQ_CLONE_MASK         REQ_COMMON_MASK
 
 #define REQ_RAHEAD             (1 << __REQ_RAHEAD)