Merge branch 'cfq-2.6.33' into for-2.6.33
Jens Axboe [Tue, 3 Nov 2009 20:12:10 +0000 (21:12 +0100)]
block/cfq-iosched.c
drivers/block/drbd/drbd_req.c
fs/aio.c
fs/block_dev.c
fs/direct-io.c
include/linux/backing-dev.h
include/linux/bio.h
include/linux/blkdev.h
include/linux/fs.h

index aa00d8f..8b5ba18 100644 (file)
@@ -2625,12 +2625,10 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
                        cfqq->ioprio = IOPRIO_NORM;
        } else {
                /*
-                * check if we need to unboost the queue
+                * unboost the queue (if needed)
                 */
-               if (cfqq->ioprio_class != cfqq->org_ioprio_class)
-                       cfqq->ioprio_class = cfqq->org_ioprio_class;
-               if (cfqq->ioprio != cfqq->org_ioprio)
-                       cfqq->ioprio = cfqq->org_ioprio;
+               cfqq->ioprio_class = cfqq->org_ioprio_class;
+               cfqq->ioprio = cfqq->org_ioprio;
        }
 }
 
index d3426ff..3678d3d 100644 (file)
@@ -40,7 +40,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req
        part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
        part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
        part_stat_unlock();
-       mdev->vdisk->part0.in_flight++;
+       mdev->vdisk->part0.in_flight[rw]++;
 }
 
 /* Update disk stats when completing request upwards */
@@ -53,7 +53,7 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
        part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
        part_round_stats(cpu, &mdev->vdisk->part0);
        part_stat_unlock();
-       mdev->vdisk->part0.in_flight--;
+       mdev->vdisk->part0.in_flight[rw]--;
 }
 
 static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
index 02a2c93..c30dfc0 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
 #include <linux/aio_abi.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/backing-dev.h>
 #include <linux/uio.h>
 
 #define DEBUG 0
@@ -32,6 +33,9 @@
 #include <linux/workqueue.h>
 #include <linux/security.h>
 #include <linux/eventfd.h>
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/hash.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
+#define AIO_BATCH_HASH_BITS    3 /* allocated on-stack, so don't go crazy */
+#define AIO_BATCH_HASH_SIZE    (1 << AIO_BATCH_HASH_BITS)
+struct aio_batch_entry {
+       struct hlist_node list;
+       struct address_space *mapping;
+};
+mempool_t *abe_pool;
+
 static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
@@ -73,6 +85,8 @@ static int __init aio_setup(void)
        kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
        aio_wq = create_workqueue("aio");
+       abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
+       BUG_ON(!abe_pool);
 
        pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
 
@@ -1531,8 +1545,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
        return 1;
 }
 
+static void aio_batch_add(struct address_space *mapping,
+                         struct hlist_head *batch_hash)
+{
+       struct aio_batch_entry *abe;
+       struct hlist_node *pos;
+       unsigned bucket;
+
+       bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
+       hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
+               if (abe->mapping == mapping)
+                       return;
+       }
+
+       abe = mempool_alloc(abe_pool, GFP_KERNEL);
+       BUG_ON(!igrab(mapping->host));
+       abe->mapping = mapping;
+       hlist_add_head(&abe->list, &batch_hash[bucket]);
+       return;
+}
+
+static void aio_batch_free(struct hlist_head *batch_hash)
+{
+       struct aio_batch_entry *abe;
+       struct hlist_node *pos, *n;
+       int i;
+
+       for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
+               hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
+                       blk_run_address_space(abe->mapping);
+                       iput(abe->mapping->host);
+                       hlist_del(&abe->list);
+                       mempool_free(abe, abe_pool);
+               }
+       }
+}
+
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-                        struct iocb *iocb)
+                        struct iocb *iocb, struct hlist_head *batch_hash)
 {
        struct kiocb *req;
        struct file *file;
@@ -1608,6 +1658,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                        ;
        }
        spin_unlock_irq(&ctx->ctx_lock);
+       if (req->ki_opcode == IOCB_CMD_PREAD ||
+           req->ki_opcode == IOCB_CMD_PREADV ||
+           req->ki_opcode == IOCB_CMD_PWRITE ||
+           req->ki_opcode == IOCB_CMD_PWRITEV)
+               aio_batch_add(file->f_mapping, batch_hash);
+
        aio_put_req(req);       /* drop extra ref to req */
        return 0;
 
@@ -1635,6 +1691,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
        struct kioctx *ctx;
        long ret = 0;
        int i;
+       struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
 
        if (unlikely(nr < 0))
                return -EINVAL;
@@ -1666,10 +1723,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
                        break;
                }
 
-               ret = io_submit_one(ctx, user_iocb, &tmp);
+               ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
                if (ret)
                        break;
        }
+       aio_batch_free(batch_hash);
 
        put_ioctx(ctx);
        return i ? i : ret;
index 9cf4b92..dde91e7 100644 (file)
@@ -405,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
  
 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
-       return sync_blockdev(I_BDEV(filp->f_mapping->host));
+       struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+       int error;
+
+       error = sync_blockdev(bdev);
+       if (error)
+               return error;
+       
+       error = blkdev_issue_flush(bdev, NULL);
+       if (error == -EOPNOTSUPP)
+               error = 0;
+       return error;
 }
 
 /*
index 8b10b87..3af761c 100644 (file)
@@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
        if (dio->bio)
                dio_bio_submit(dio);
 
-       /* All IO is now issued, send it on its way */
-       blk_run_address_space(inode->i_mapping);
-
        /*
         * It is possible that, we return short IO due to end of file.
         * In that case, we need to release all the pages we got hold on.
@@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
            ((rw & READ) || (dio->result == dio->size)))
                ret = -EIOCBQUEUED;
 
-       if (ret != -EIOCBQUEUED)
+       if (ret != -EIOCBQUEUED) {
+               /* All IO is now issued, send it on its way */
+               blk_run_address_space(inode->i_mapping);
                dio_await_completion(dio);
+       }
 
        /*
         * Sync will always be dropping the final ref and completing the
@@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        int acquire_i_mutex = 0;
 
        if (rw & WRITE)
-               rw = WRITE_ODIRECT;
+               rw = WRITE_SYNC_PLUG;
 
        if (bdev)
                bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
index b449e73..fcbc26a 100644 (file)
@@ -331,4 +331,17 @@ static inline int bdi_sched_wait(void *word)
        return 0;
 }
 
+static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
+                                      struct page *page)
+{
+       if (bdi && bdi->unplug_io_fn)
+               bdi->unplug_io_fn(bdi, page);
+}
+
+static inline void blk_run_address_space(struct address_space *mapping)
+{
+       if (mapping)
+               blk_run_backing_dev(mapping->backing_dev_info, NULL);
+}
+
 #endif         /* _LINUX_BACKING_DEV_H */
index 5be93f1..474792b 100644 (file)
@@ -450,11 +450,8 @@ extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 /*
  * remember never ever reenable interrupts between a bvec_kmap_irq and
  * bvec_kunmap_irq!
- *
- * This function MUST be inlined - it plays with the CPU interrupt flags.
  */
-static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
-               unsigned long *flags)
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
 {
        unsigned long addr;
 
@@ -470,8 +467,7 @@ static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
        return (char *) addr + bvec->bv_offset;
 }
 
-static __always_inline void bvec_kunmap_irq(char *buffer,
-               unsigned long *flags)
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
 {
        unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
 
index 221cecd..39c601f 100644 (file)
@@ -823,19 +823,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
        return bdev->bd_disk->queue;
 }
 
-static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
-                                      struct page *page)
-{
-       if (bdi && bdi->unplug_io_fn)
-               bdi->unplug_io_fn(bdi, page);
-}
-
-static inline void blk_run_address_space(struct address_space *mapping)
-{
-       if (mapping)
-               blk_run_backing_dev(mapping->backing_dev_info, NULL);
-}
-
 /*
  * blk_rq_pos()                        : the current sector
  * blk_rq_bytes()              : bytes left in the entire request
index 2620a8c..2f5fca4 100644 (file)
@@ -129,7 +129,6 @@ struct inodes_stat_t {
  * WRITE_SYNC          Like WRITE_SYNC_PLUG, but also unplugs the device
  *                     immediately after submission. The write equivalent
  *                     of READ_SYNC.
- * WRITE_ODIRECT       Special case write for O_DIRECT only.
  * SWRITE_SYNC
  * SWRITE_SYNC_PLUG    Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
  *                     See SWRITE.
@@ -151,7 +150,6 @@ struct inodes_stat_t {
 #define READ_META      (READ | (1 << BIO_RW_META))
 #define WRITE_SYNC_PLUG        (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC     (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_ODIRECT  (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
 #define SWRITE_SYNC_PLUG       \
                        (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC    (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))