Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block
Linus Torvalds [Tue, 15 Sep 2009 00:55:15 +0000 (17:55 -0700)]
* 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block: (29 commits)
  block: use blkdev_issue_discard in blk_ioctl_discard
  Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads
  block: don't assume device has a request list backing in nr_requests store
  block: Optimal I/O limit wrapper
  cfq: choose a new next_req when a request is dispatched
  Seperate read and write statistics of in_flight requests
  aoe: end barrier bios with EOPNOTSUPP
  block: trace bio queueing trial only when it occurs
  block: enable rq CPU completion affinity by default
  cfq: fix the log message after dispatched a request
  block: use printk_once
  cciss: memory leak in cciss_init_one()
  splice: update mtime and atime on files
  block: make blk_iopoll_prep_sched() follow normal 0/1 return convention
  cfq-iosched: get rid of must_alloc flag
  block: use interrupts disabled version of raise_softirq_irqoff()
  block: fix comment in blk-iopoll.c
  block: adjust default budget for blk-iopoll
  block: fix long lines in block/blk-iopoll.c
  block: add blk-iopoll, a NAPI like approach for block devices
  ...

42 files changed:
block/Makefile
block/blk-barrier.c
block/blk-core.c
block/blk-iopoll.c [new file with mode: 0644]
block/blk-merge.c
block/blk-settings.c
block/blk-sysfs.c
block/blk.h
block/cfq-iosched.c
block/elevator.c
block/genhd.c
block/ioctl.c
drivers/block/aoe/aoeblk.c
drivers/block/cciss.c
drivers/block/loop.c
drivers/block/paride/pcd.c
drivers/block/sx8.c
drivers/block/viodasd.c
drivers/md/dm-raid1.c
drivers/md/dm-stripe.c
drivers/md/dm.c
drivers/md/linear.c
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/scsi/scsi_lib.c
drivers/staging/dst/dcore.c
fs/btrfs/extent-tree.c
fs/btrfs/volumes.c
fs/gfs2/rgrp.c
fs/partitions/check.c
fs/splice.c
include/linux/bio.h
include/linux/blk-iopoll.h [new file with mode: 0644]
include/linux/blkdev.h
include/linux/fs.h
include/linux/genhd.h
include/linux/interrupt.h
kernel/sysctl.c
mm/swapfile.c

index 6c54ed0..ba74ca6 100644 (file)
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
                        blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-                       ioctl.o genhd.o scsi_ioctl.o
+                       blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_BLK_DEV_BSG)      += bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)     += noop-iosched.o
index 30022b4..6593ab3 100644 (file)
@@ -348,6 +348,9 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
                clear_bit(BIO_UPTODATE, &bio->bi_flags);
        }
 
+       if (bio->bi_private)
+               complete(bio->bi_private);
+
        bio_put(bio);
 }
 
@@ -357,21 +360,20 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
  * @sector:    start sector
  * @nr_sects:  number of sectors to discard
  * @gfp_mask:  memory allocation flags (for bio_alloc)
+ * @flags:     DISCARD_FL_* flags to control behaviour
  *
  * Description:
- *    Issue a discard request for the sectors in question. Does not wait.
+ *    Issue a discard request for the sectors in question.
  */
-int blkdev_issue_discard(struct block_device *bdev,
-                        sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+               sector_t nr_sects, gfp_t gfp_mask, int flags)
 {
-       struct request_queue *q;
-       struct bio *bio;
+       DECLARE_COMPLETION_ONSTACK(wait);
+       struct request_queue *q = bdev_get_queue(bdev);
+       int type = flags & DISCARD_FL_BARRIER ?
+               DISCARD_BARRIER : DISCARD_NOBARRIER;
        int ret = 0;
 
-       if (bdev->bd_disk == NULL)
-               return -ENXIO;
-
-       q = bdev_get_queue(bdev);
        if (!q)
                return -ENXIO;
 
@@ -379,12 +381,14 @@ int blkdev_issue_discard(struct block_device *bdev,
                return -EOPNOTSUPP;
 
        while (nr_sects && !ret) {
-               bio = bio_alloc(gfp_mask, 0);
+               struct bio *bio = bio_alloc(gfp_mask, 0);
                if (!bio)
                        return -ENOMEM;
 
                bio->bi_end_io = blkdev_discard_end_io;
                bio->bi_bdev = bdev;
+               if (flags & DISCARD_FL_WAIT)
+                       bio->bi_private = &wait;
 
                bio->bi_sector = sector;
 
@@ -396,10 +400,13 @@ int blkdev_issue_discard(struct block_device *bdev,
                        bio->bi_size = nr_sects << 9;
                        nr_sects = 0;
                }
+
                bio_get(bio);
-               submit_bio(DISCARD_BARRIER, bio);
+               submit_bio(type, bio);
+
+               if (flags & DISCARD_FL_WAIT)
+                       wait_for_completion(&wait);
 
-               /* Check if it failed immediately */
                if (bio_flagged(bio, BIO_EOPNOTSUPP))
                        ret = -EOPNOTSUPP;
                else if (!bio_flagged(bio, BIO_UPTODATE))
index e695634..8135228 100644 (file)
@@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
                part_stat_inc(cpu, part, merges[rw]);
        else {
                part_round_stats(cpu, part);
-               part_inc_in_flight(part);
+               part_inc_in_flight(part, rw);
        }
 
        part_stat_unlock();
@@ -1031,7 +1031,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 
        if (part->in_flight) {
                __part_stat_add(cpu, part, time_in_queue,
-                               part->in_flight * (now - part->stamp));
+                               part_in_flight(part) * (now - part->stamp));
                __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
        }
        part->stamp = now;
@@ -1112,31 +1112,27 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        req->cmd_type = REQ_TYPE_FS;
 
        /*
-        * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
+        * Inherit FAILFAST from bio (for read-ahead, and explicit
+        * FAILFAST).  FAILFAST flags are identical for req and bio.
         */
-       if (bio_rw_ahead(bio))
-               req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-                                  REQ_FAILFAST_DRIVER);
-       if (bio_failfast_dev(bio))
-               req->cmd_flags |= REQ_FAILFAST_DEV;
-       if (bio_failfast_transport(bio))
-               req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-       if (bio_failfast_driver(bio))
-               req->cmd_flags |= REQ_FAILFAST_DRIVER;
-
-       if (unlikely(bio_discard(bio))) {
+       if (bio_rw_flagged(bio, BIO_RW_AHEAD))
+               req->cmd_flags |= REQ_FAILFAST_MASK;
+       else
+               req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
+
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
                req->cmd_flags |= REQ_DISCARD;
-               if (bio_barrier(bio))
+               if (bio_rw_flagged(bio, BIO_RW_BARRIER))
                        req->cmd_flags |= REQ_SOFTBARRIER;
                req->q->prepare_discard_fn(req->q, req);
-       } else if (unlikely(bio_barrier(bio)))
+       } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
                req->cmd_flags |= REQ_HARDBARRIER;
 
-       if (bio_sync(bio))
+       if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
                req->cmd_flags |= REQ_RW_SYNC;
-       if (bio_rw_meta(bio))
+       if (bio_rw_flagged(bio, BIO_RW_META))
                req->cmd_flags |= REQ_RW_META;
-       if (bio_noidle(bio))
+       if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
                req->cmd_flags |= REQ_NOIDLE;
 
        req->errors = 0;
@@ -1151,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-       return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
+       return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1160,11 +1156,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
        int el_ret;
        unsigned int bytes = bio->bi_size;
        const unsigned short prio = bio_prio(bio);
-       const int sync = bio_sync(bio);
-       const int unplug = bio_unplug(bio);
+       const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+       const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
+       const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
        int rw_flags;
 
-       if (bio_barrier(bio) && bio_has_data(bio) &&
+       if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
            (q->next_ordered == QUEUE_ORDERED_NONE)) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
@@ -1178,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
        spin_lock_irq(q->queue_lock);
 
-       if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
                goto get_rq;
 
        el_ret = elv_merge(q, &req, bio);
@@ -1191,6 +1188,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
                trace_block_bio_backmerge(q, bio);
 
+               if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+                       blk_rq_set_mixed_merge(req);
+
                req->biotail->bi_next = bio;
                req->biotail = bio;
                req->__data_len += bytes;
@@ -1210,6 +1210,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
                trace_block_bio_frontmerge(q, bio);
 
+               if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
+                       blk_rq_set_mixed_merge(req);
+                       req->cmd_flags &= ~REQ_FAILFAST_MASK;
+                       req->cmd_flags |= ff;
+               }
+
                bio->bi_next = req->bio;
                req->bio = bio;
 
@@ -1457,19 +1463,20 @@ static inline void __generic_make_request(struct bio *bio)
                if (old_sector != -1)
                        trace_block_remap(q, bio, old_dev, old_sector);
 
-               trace_block_bio_queue(q, bio);
-
                old_sector = bio->bi_sector;
                old_dev = bio->bi_bdev->bd_dev;
 
                if (bio_check_eod(bio, nr_sectors))
                        goto end_io;
 
-               if (bio_discard(bio) && !q->prepare_discard_fn) {
+               if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+                   !q->prepare_discard_fn) {
                        err = -EOPNOTSUPP;
                        goto end_io;
                }
 
+               trace_block_bio_queue(q, bio);
+
                ret = q->make_request_fn(q, bio);
        } while (ret);
 
@@ -1654,6 +1661,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
+/**
+ * blk_rq_err_bytes - determine number of bytes till the next failure boundary
+ * @rq: request to examine
+ *
+ * Description:
+ *     A request could be merge of IOs which require different failure
+ *     handling.  This function determines the number of bytes which
+ *     can be failed from the beginning of the request without
+ *     crossing into area which need to be retried further.
+ *
+ * Return:
+ *     The number of bytes to fail.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+unsigned int blk_rq_err_bytes(const struct request *rq)
+{
+       unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+       unsigned int bytes = 0;
+       struct bio *bio;
+
+       if (!(rq->cmd_flags & REQ_MIXED_MERGE))
+               return blk_rq_bytes(rq);
+
+       /*
+        * Currently the only 'mixing' which can happen is between
+        * different fastfail types.  We can safely fail portions
+        * which have all the failfast bits that the first one has -
+        * the ones which are at least as eager to fail as the first
+        * one.
+        */
+       for (bio = rq->bio; bio; bio = bio->bi_next) {
+               if ((bio->bi_rw & ff) != ff)
+                       break;
+               bytes += bio->bi_size;
+       }
+
+       /* this could lead to infinite loop */
+       BUG_ON(blk_rq_bytes(rq) && !bytes);
+       return bytes;
+}
+EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
+
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
        if (blk_do_io_stat(req)) {
@@ -1687,7 +1738,7 @@ static void blk_account_io_done(struct request *req)
                part_stat_inc(cpu, part, ios[rw]);
                part_stat_add(cpu, part, ticks[rw], duration);
                part_round_stats(cpu, part);
-               part_dec_in_flight(part);
+               part_dec_in_flight(part, rw);
 
                part_stat_unlock();
        }
@@ -1807,8 +1858,15 @@ void blk_dequeue_request(struct request *rq)
         * and to it is freed is accounted as io that is in progress at
         * the driver side.
         */
-       if (blk_account_rq(rq))
+       if (blk_account_rq(rq)) {
                q->in_flight[rq_is_sync(rq)]++;
+               /*
+                * Mark this device as supporting hardware queuing, if
+                * we have more IOs in flight than 4.
+                */
+               if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
+                       set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
+       }
 }
 
 /**
@@ -2000,6 +2058,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
        if (blk_fs_request(req) || blk_discard_rq(req))
                req->__sector += total_bytes >> 9;
 
+       /* mixed attributes always follow the first bio */
+       if (req->cmd_flags & REQ_MIXED_MERGE) {
+               req->cmd_flags &= ~REQ_FAILFAST_MASK;
+               req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
+       }
+
        /*
         * If total number of sectors is less than the first segment
         * size, something has gone terribly wrong.
@@ -2179,6 +2243,25 @@ bool blk_end_request_cur(struct request *rq, int error)
 EXPORT_SYMBOL(blk_end_request_cur);
 
 /**
+ * blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ *     Complete @rq till the next failure boundary.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool blk_end_request_err(struct request *rq, int error)
+{
+       WARN_ON(error >= 0);
+       return blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_err);
+
+/**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
  * @error:    %0 for success, < %0 for error
@@ -2237,12 +2320,31 @@ bool __blk_end_request_cur(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_end_request_cur);
 
+/**
+ * __blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ *     Complete @rq till the next failure boundary.  Must be called
+ *     with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool __blk_end_request_err(struct request *rq, int error)
+{
+       WARN_ON(error >= 0);
+       return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_err);
+
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                     struct bio *bio)
 {
-       /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
-          we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
-       rq->cmd_flags |= (bio->bi_rw & 3);
+       /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
+       rq->cmd_flags |= bio->bi_rw & REQ_RW;
 
        if (bio_has_data(bio)) {
                rq->nr_phys_segments = bio_phys_segments(q, bio);
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644 (file)
index 0000000..ca56420
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/blk-iopoll.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+
+int blk_iopoll_enabled = 1;
+EXPORT_SYMBOL(blk_iopoll_enabled);
+
+static unsigned int blk_iopoll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * blk_iopoll_sched - Schedule a run of the iopoll handler
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Add this blk_iopoll structure to the pending poll list and trigger the
+ *     raise of the blk iopoll softirq. The driver must already have gotten a
+ *     succesful return from blk_iopoll_sched_prep() before calling this.
+ **/
+void blk_iopoll_sched(struct blk_iopoll *iop)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
+       __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_sched);
+
+/**
+ * __blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     See blk_iopoll_complete(). This function must be called with interrupts
+ *     disabled.
+ **/
+void __blk_iopoll_complete(struct blk_iopoll *iop)
+{
+       list_del(&iop->list);
+       smp_mb__before_clear_bit();
+       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(__blk_iopoll_complete);
+
+/**
+ * blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     If a driver consumes less than the assigned budget in its run of the
+ *     iopoll handler, it'll end the polled mode by calling this function. The
+ *     iopoll handler will not be invoked again before blk_iopoll_sched_prep()
+ *     is called.
+ **/
+void blk_iopoll_complete(struct blk_iopoll *iopoll)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __blk_iopoll_complete(iopoll);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_complete);
+
+static void blk_iopoll_softirq(struct softirq_action *h)
+{
+       struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
+       int rearm = 0, budget = blk_iopoll_budget;
+       unsigned long start_time = jiffies;
+
+       local_irq_disable();
+
+       while (!list_empty(list)) {
+               struct blk_iopoll *iop;
+               int work, weight;
+
+               /*
+                * If softirq window is exhausted then punt.
+                */
+               if (budget <= 0 || time_after(jiffies, start_time)) {
+                       rearm = 1;
+                       break;
+               }
+
+               local_irq_enable();
+
+               /* Even though interrupts have been re-enabled, this
+                * access is safe because interrupts can only add new
+                * entries to the tail of this list, and only ->poll()
+                * calls can remove this head entry from the list.
+                */
+               iop = list_entry(list->next, struct blk_iopoll, list);
+
+               weight = iop->weight;
+               work = 0;
+               if (test_bit(IOPOLL_F_SCHED, &iop->state))
+                       work = iop->poll(iop, weight);
+
+               budget -= work;
+
+               local_irq_disable();
+
+               /*
+                * Drivers must not modify the iopoll state, if they
+                * consume their assigned weight (or more, some drivers can't
+                * easily just stop processing, they have to complete an
+                * entire mask of commands).In such cases this code
+                * still "owns" the iopoll instance and therefore can
+                * move the instance around on the list at-will.
+                */
+               if (work >= weight) {
+                       if (blk_iopoll_disable_pending(iop))
+                               __blk_iopoll_complete(iop);
+                       else
+                               list_move_tail(&iop->list, list);
+               }
+       }
+
+       if (rearm)
+               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+
+       local_irq_enable();
+}
+
+/**
+ * blk_iopoll_disable - Disable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void blk_iopoll_disable(struct blk_iopoll *iop)
+{
+       set_bit(IOPOLL_F_DISABLE, &iop->state);
+       while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
+               msleep(1);
+       clear_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_disable);
+
+/**
+ * blk_iopoll_enable - Enable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Enable iopoll on this @iop. Note that the handler run will not be
+ *     scheduled, it will only mark it as active.
+ **/
+void blk_iopoll_enable(struct blk_iopoll *iop)
+{
+       BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
+       smp_mb__before_clear_bit();
+       clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_enable);
+
+/**
+ * blk_iopoll_init - Initialize this @iop
+ * @iop:      The parent iopoll structure
+ * @weight:   The default weight (or command completion budget)
+ * @poll_fn:  The handler to invoke
+ *
+ * Description:
+ *     Initialize this blk_iopoll structure. Before being actively used, the
+ *     driver must call blk_iopoll_enable().
+ **/
+void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
+{
+       memset(iop, 0, sizeof(*iop));
+       INIT_LIST_HEAD(&iop->list);
+       iop->weight = weight;
+       iop->poll = poll_fn;
+       set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_init);
+
+static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
+                                         unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+
+               local_irq_disable();
+               list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+                                &__get_cpu_var(blk_cpu_iopoll));
+               __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
+       .notifier_call  = blk_iopoll_cpu_notify,
+};
+
+static __init int blk_iopoll_setup(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+       open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
+       register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
+       return 0;
+}
+subsys_initcall(blk_iopoll_setup);
index e199967..99cb5cf 100644 (file)
@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
        return 1;
 }
 
+/**
+ * blk_rq_set_mixed_merge - mark a request as mixed merge
+ * @rq: request to mark as mixed merge
+ *
+ * Description:
+ *     @rq is about to be mixed merged.  Make sure the attributes
+ *     which can be mixed are set in each bio and mark @rq as mixed
+ *     merged.
+ */
+void blk_rq_set_mixed_merge(struct request *rq)
+{
+       unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+       struct bio *bio;
+
+       if (rq->cmd_flags & REQ_MIXED_MERGE)
+               return;
+
+       /*
+        * @rq will no longer represent mixable attributes for all the
+        * contained bios.  It will just track those of the first one.
+        * Distributes the attributs to each bio.
+        */
+       for (bio = rq->bio; bio; bio = bio->bi_next) {
+               WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
+                            (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
+               bio->bi_rw |= ff;
+       }
+       rq->cmd_flags |= REQ_MIXED_MERGE;
+}
+
 static void blk_account_io_merge(struct request *req)
 {
        if (blk_do_io_stat(req)) {
@@ -321,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
                part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
                part_round_stats(cpu, part);
-               part_dec_in_flight(part);
+               part_dec_in_flight(part, rq_data_dir(req));
 
                part_stat_unlock();
        }
@@ -350,12 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        if (blk_integrity_rq(req) != blk_integrity_rq(next))
                return 0;
 
-       /* don't merge requests of different failfast settings */
-       if (blk_failfast_dev(req)       != blk_failfast_dev(next)       ||
-           blk_failfast_transport(req) != blk_failfast_transport(next) ||
-           blk_failfast_driver(req)    != blk_failfast_driver(next))
-               return 0;
-
        /*
         * If we are allowed to merge, then append bio list
         * from next to rq and release next. merge_requests_fn
@@ -366,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
                return 0;
 
        /*
+        * If failfast settings disagree or any of the two is already
+        * a mixed merge, mark both as mixed before proceeding.  This
+        * makes sure that all involved bios have mixable attributes
+        * set properly.
+        */
+       if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
+           (req->cmd_flags & REQ_FAILFAST_MASK) !=
+           (next->cmd_flags & REQ_FAILFAST_MASK)) {
+               blk_rq_set_mixed_merge(req);
+               blk_rq_set_mixed_merge(next);
+       }
+
+       /*
         * At this point we have either done a back merge
         * or front merge. We need the smaller start_time of
         * the merged requests to be the current request
index 476d870..83413ff 100644 (file)
@@ -428,6 +428,25 @@ void blk_queue_io_min(struct request_queue *q, unsigned int min)
 EXPORT_SYMBOL(blk_queue_io_min);
 
 /**
+ * blk_limits_io_opt - set optimal request size for a device
+ * @limits: the queue limits
+ * @opt:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Storage devices may report an optimal I/O size, which is the
+ *   device's preferred unit for sustained I/O.  This is rarely reported
+ *   for disk drives.  For RAID arrays it is usually the stripe width or
+ *   the internal track size.  A properly aligned multiple of
+ *   optimal_io_size is the preferred request size for workloads where
+ *   sustained throughput is desired.
+ */
+void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
+{
+       limits->io_opt = opt;
+}
+EXPORT_SYMBOL(blk_limits_io_opt);
+
+/**
  * blk_queue_io_opt - set optimal request size for the queue
  * @q: the request queue for the device
  * @opt:  optimal request size in bytes
@@ -442,7 +461,7 @@ EXPORT_SYMBOL(blk_queue_io_min);
  */
 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 {
-       q->limits.io_opt = opt;
+       blk_limits_io_opt(&q->limits, opt);
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
index d3aa2aa..b78c9c3 100644 (file)
@@ -40,7 +40,12 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 {
        struct request_list *rl = &q->rq;
        unsigned long nr;
-       int ret = queue_var_store(&nr, page, count);
+       int ret;
+
+       if (!q->request_fn)
+               return -EINVAL;
+
+       ret = queue_var_store(&nr, page, count);
        if (nr < BLKDEV_MIN_RQ)
                nr = BLKDEV_MIN_RQ;
 
index 3fae6ad..5ee3d7e 100644 (file)
@@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
 void blk_recalc_rq_segments(struct request *rq);
+void blk_rq_set_mixed_merge(struct request *rq);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
index fd7080e..0e3814b 100644 (file)
@@ -134,13 +134,8 @@ struct cfq_data {
        struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
        unsigned int busy_queues;
-       /*
-        * Used to track any pending rt requests so we can pre-empt current
-        * non-RT cfqq in service when this value is non-zero.
-        */
-       unsigned int busy_rt_queues;
 
-       int rq_in_driver;
+       int rq_in_driver[2];
        int sync_flight;
 
        /*
@@ -191,7 +186,6 @@ enum cfqq_state_flags {
        CFQ_CFQQ_FLAG_on_rr = 0,        /* on round-robin busy list */
        CFQ_CFQQ_FLAG_wait_request,     /* waiting for a request */
        CFQ_CFQQ_FLAG_must_dispatch,    /* must be allowed a dispatch */
-       CFQ_CFQQ_FLAG_must_alloc,       /* must be allowed rq alloc */
        CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
        CFQ_CFQQ_FLAG_fifo_expire,      /* FIFO checked in this slice */
        CFQ_CFQQ_FLAG_idle_window,      /* slice idling enabled */
@@ -218,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)             \
 CFQ_CFQQ_FNS(on_rr);
 CFQ_CFQQ_FNS(wait_request);
 CFQ_CFQQ_FNS(must_dispatch);
-CFQ_CFQQ_FNS(must_alloc);
 CFQ_CFQQ_FNS(must_alloc_slice);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
@@ -239,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
                                                struct io_context *);
 
+static inline int rq_in_driver(struct cfq_data *cfqd)
+{
+       return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
+}
+
 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
                                            int is_sync)
 {
@@ -257,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
  */
 static inline int cfq_bio_sync(struct bio *bio)
 {
-       if (bio_data_dir(bio) == READ || bio_sync(bio))
+       if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
                return 1;
 
        return 0;
@@ -648,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        cfq_mark_cfqq_on_rr(cfqq);
        cfqd->busy_queues++;
-       if (cfq_class_rt(cfqq))
-               cfqd->busy_rt_queues++;
 
        cfq_resort_rr_list(cfqd, cfqq);
 }
@@ -673,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
        BUG_ON(!cfqd->busy_queues);
        cfqd->busy_queues--;
-       if (cfq_class_rt(cfqq))
-               cfqd->busy_rt_queues--;
 }
 
 /*
@@ -760,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
 
-       cfqd->rq_in_driver++;
+       cfqd->rq_in_driver[rq_is_sync(rq)]++;
        cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
-                                               cfqd->rq_in_driver);
+                                               rq_in_driver(cfqd));
 
        cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
 }
@@ -770,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
 static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
+       const int sync = rq_is_sync(rq);
 
-       WARN_ON(!cfqd->rq_in_driver);
-       cfqd->rq_in_driver--;
+       WARN_ON(!cfqd->rq_in_driver[sync]);
+       cfqd->rq_in_driver[sync]--;
        cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
-                                               cfqd->rq_in_driver);
+                                               rq_in_driver(cfqd));
 }
 
 static void cfq_remove_request(struct request *rq)
@@ -1080,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        /*
         * still requests with the driver, don't idle
         */
-       if (cfqd->rq_in_driver)
+       if (rq_in_driver(cfqd))
                return;
 
        /*
@@ -1115,6 +1110,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
        cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
 
+       cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
        cfq_remove_request(rq);
        cfqq->dispatched++;
        elv_dispatch_sort(q, rq);
@@ -1179,20 +1175,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
                goto expire;
 
        /*
-        * If we have a RT cfqq waiting, then we pre-empt the current non-rt
-        * cfqq.
-        */
-       if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
-               /*
-                * We simulate this as cfqq timed out so that it gets to bank
-                * the remaining of its time slice.
-                */
-               cfq_log_cfqq(cfqd, cfqq, "preempt");
-               cfq_slice_expired(cfqd, 1);
-               goto new_queue;
-       }
-
-       /*
         * The active queue has requests and isn't expired, allow it to
         * dispatch.
         */
@@ -1312,6 +1294,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
                return 0;
 
        /*
+        * Drain async requests before we start sync IO
+        */
+       if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+               return 0;
+
+       /*
         * If this is an async queue and we have sync IO in flight, let it wait
         */
        if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
@@ -1362,7 +1350,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
                cfq_slice_expired(cfqd, 0);
        }
 
-       cfq_log(cfqd, "dispatched a request");
+       cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
        return 1;
 }
 
@@ -2130,11 +2118,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
  */
 static void cfq_update_hw_tag(struct cfq_data *cfqd)
 {
-       if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
-               cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
+       if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
+               cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
 
        if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
-           cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
+           rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
                return;
 
        if (cfqd->hw_tag_samples++ < 50)
@@ -2161,9 +2149,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 
        cfq_update_hw_tag(cfqd);
 
-       WARN_ON(!cfqd->rq_in_driver);
+       WARN_ON(!cfqd->rq_in_driver[sync]);
        WARN_ON(!cfqq->dispatched);
-       cfqd->rq_in_driver--;
+       cfqd->rq_in_driver[sync]--;
        cfqq->dispatched--;
 
        if (cfq_cfqq_sync(cfqq))
@@ -2197,7 +2185,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                        cfq_arm_slice_timer(cfqd);
        }
 
-       if (!cfqd->rq_in_driver)
+       if (!rq_in_driver(cfqd))
                cfq_schedule_dispatch(cfqd);
 }
 
@@ -2229,8 +2217,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
 
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
-       if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
-           !cfq_cfqq_must_alloc_slice(cfqq)) {
+       if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
                cfq_mark_cfqq_must_alloc_slice(cfqq);
                return ELV_MQUEUE_MUST;
        }
@@ -2317,7 +2304,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
        }
 
        cfqq->allocated[rw]++;
-       cfq_clear_cfqq_must_alloc(cfqq);
        atomic_inc(&cfqq->ref);
 
        spin_unlock_irqrestore(q->queue_lock, flags);
index 2d511f9..1975b61 100644 (file)
@@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
        /*
         * Don't merge file system requests and discard requests
         */
-       if (bio_discard(bio) != bio_discard(rq->bio))
+       if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
+           bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
                return 0;
 
        /*
@@ -100,19 +101,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
        if (bio_integrity(bio) != blk_integrity_rq(rq))
                return 0;
 
-       /*
-        * Don't merge if failfast settings don't match.
-        *
-        * FIXME: The negation in front of each condition is necessary
-        * because bio and request flags use different bit positions
-        * and the accessors return those bits directly.  This
-        * ugliness will soon go away.
-        */
-       if (!bio_failfast_dev(bio)       != !blk_failfast_dev(rq)       ||
-           !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
-           !bio_failfast_driver(bio)    != !blk_failfast_driver(rq))
-               return 0;
-
        if (!elv_iosched_allow_merge(rq, bio))
                return 0;
 
index f4c64c2..5b76bf5 100644 (file)
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
        &dev_attr_alignment_offset.attr,
        &dev_attr_capability.attr,
        &dev_attr_stat.attr,
+       &dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        &dev_attr_fail.attr,
 #endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           part_stat_read(hd, merges[1]),
                           (unsigned long long)part_stat_read(hd, sectors[1]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-                          hd->in_flight,
+                          part_in_flight(hd),
                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
                        );
@@ -1215,6 +1217,16 @@ void put_disk(struct gendisk *disk)
 
 EXPORT_SYMBOL(put_disk);
 
+static void set_disk_ro_uevent(struct gendisk *gd, int ro)
+{
+       char event[] = "DISK_RO=1";
+       char *envp[] = { event, NULL };
+
+       if (!ro)
+               event[8] = '0';
+       kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
+}
+
 void set_device_ro(struct block_device *bdev, int flag)
 {
        bdev->bd_part->policy = flag;
@@ -1227,8 +1239,12 @@ void set_disk_ro(struct gendisk *disk, int flag)
        struct disk_part_iter piter;
        struct hd_struct *part;
 
-       disk_part_iter_init(&piter, disk,
-                           DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
+       if (disk->part0.policy != flag) {
+               set_disk_ro_uevent(disk, flag);
+               disk->part0.policy = flag;
+       }
+
+       disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
        while ((part = disk_part_iter_next(&piter)))
                part->policy = flag;
        disk_part_iter_exit(&piter);
index 500e4c7..d3e6b58 100644 (file)
@@ -112,22 +112,9 @@ static int blkdev_reread_part(struct block_device *bdev)
        return res;
 }
 
-static void blk_ioc_discard_endio(struct bio *bio, int err)
-{
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       }
-       complete(bio->bi_private);
-}
-
 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
                             uint64_t len)
 {
-       struct request_queue *q = bdev_get_queue(bdev);
-       int ret = 0;
-
        if (start & 511)
                return -EINVAL;
        if (len & 511)
@@ -137,40 +124,8 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 
        if (start + len > (bdev->bd_inode->i_size >> 9))
                return -EINVAL;
-
-       if (!q->prepare_discard_fn)
-               return -EOPNOTSUPP;
-
-       while (len && !ret) {
-               DECLARE_COMPLETION_ONSTACK(wait);
-               struct bio *bio;
-
-               bio = bio_alloc(GFP_KERNEL, 0);
-
-               bio->bi_end_io = blk_ioc_discard_endio;
-               bio->bi_bdev = bdev;
-               bio->bi_private = &wait;
-               bio->bi_sector = start;
-
-               if (len > queue_max_hw_sectors(q)) {
-                       bio->bi_size = queue_max_hw_sectors(q) << 9;
-                       len -= queue_max_hw_sectors(q);
-                       start += queue_max_hw_sectors(q);
-               } else {
-                       bio->bi_size = len << 9;
-                       len = 0;
-               }
-               submit_bio(DISCARD_NOBARRIER, bio);
-
-               wait_for_completion(&wait);
-
-               if (bio_flagged(bio, BIO_EOPNOTSUPP))
-                       ret = -EOPNOTSUPP;
-               else if (!bio_flagged(bio, BIO_UPTODATE))
-                       ret = -EIO;
-               bio_put(bio);
-       }
-       return ret;
+       return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
+                                   DISCARD_FL_WAIT);
 }
 
 static int put_ushort(unsigned long arg, unsigned short val)
index 95d3449..b6cd571 100644 (file)
@@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
                BUG();
                bio_endio(bio, -ENXIO);
                return 0;
+       } else if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+               bio_endio(bio, -EOPNOTSUPP);
+               return 0;
        } else if (bio->bi_io_vec == NULL) {
                printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
                BUG();
index a52cc7f..0589dfb 100644 (file)
@@ -3889,7 +3889,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        int j = 0;
        int rc;
        int dac, return_code;
-       InquiryData_struct *inq_buff = NULL;
+       InquiryData_struct *inq_buff;
 
        if (reset_devices) {
                /* Reset the controller with a PCI power-cycle */
@@ -4029,6 +4029,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
                printk(KERN_WARNING "cciss: unable to determine firmware"
                        " version of controller\n");
        }
+       kfree(inq_buff);
 
        cciss_procinit(i);
 
@@ -4045,7 +4046,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
        return 1;
 
 clean4:
-       kfree(inq_buff);
        kfree(hba[i]->cmd_pool_bits);
        if (hba[i]->cmd_pool)
                pci_free_consistent(hba[i]->pdev,
index 5757188..bbb7944 100644 (file)
@@ -475,7 +475,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
        pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
        if (bio_rw(bio) == WRITE) {
-               int barrier = bio_barrier(bio);
+               bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
                struct file *file = lo->lo_backing_file;
 
                if (barrier) {
index 911dfd9..9f3518c 100644 (file)
@@ -219,8 +219,6 @@ static int pcd_sector;              /* address of next requested sector */
 static int pcd_count;          /* number of blocks still to do */
 static char *pcd_buf;          /* buffer for request in progress */
 
-static int pcd_warned;         /* Have we logged a phase warning ? */
-
 /* kernel glue structures */
 
 static int pcd_block_open(struct block_device *bdev, fmode_t mode)
@@ -417,12 +415,10 @@ static int pcd_completion(struct pcd_unit *cd, char *buf, char *fun)
                                        printk
                                            ("%s: %s: Unexpected phase %d, d=%d, k=%d\n",
                                             cd->name, fun, p, d, k);
-                               if ((verbose < 2) && !pcd_warned) {
-                                       pcd_warned = 1;
-                                       printk
-                                           ("%s: WARNING: ATAPI phase errors\n",
-                                            cd->name);
-                               }
+                               if (verbose < 2)
+                                       printk_once(
+                                           "%s: WARNING: ATAPI phase errors\n",
+                                           cd->name);
                                mdelay(1);
                        }
                        if (k++ > PCD_TMO) {
index da403b6..f5cd2e8 100644 (file)
@@ -1564,15 +1564,13 @@ static int carm_init_shm(struct carm_host *host)
 
 static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-       static unsigned int printed_version;
        struct carm_host *host;
        unsigned int pci_dac;
        int rc;
        struct request_queue *q;
        unsigned int i;
 
-       if (!printed_version++)
-               printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+       printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
 
        rc = pci_enable_device(pdev);
        if (rc)
index 390d69b..b441ce3 100644 (file)
@@ -416,15 +416,9 @@ retry:
                goto retry;
        }
        if (we.max_disk > (MAX_DISKNO - 1)) {
-               static int warned;
-
-               if (warned == 0) {
-                       warned++;
-                       printk(VIOD_KERN_INFO
-                               "Only examining the first %d "
-                               "of %d disks connected\n",
-                               MAX_DISKNO, we.max_disk + 1);
-               }
+               printk_once(VIOD_KERN_INFO
+                       "Only examining the first %d of %d disks connected\n",
+                       MAX_DISKNO, we.max_disk + 1);
        }
 
        /* Send the close event to OS/400.  We DON'T expect a response */
index 33f179e..cc9dc79 100644 (file)
@@ -1129,7 +1129,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
        if (error == -EOPNOTSUPP)
                goto out;
 
-       if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+       if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
                goto out;
 
        if (unlikely(error)) {
index 3e563d2..e0efc1a 100644 (file)
@@ -285,7 +285,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
        if (!error)
                return 0; /* I/O complete */
 
-       if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+       if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
                return error;
 
        if (error == -EOPNOTSUPP)
@@ -336,7 +336,7 @@ static void stripe_io_hints(struct dm_target *ti,
        unsigned chunk_size = (sc->chunk_mask + 1) << 9;
 
        blk_limits_io_min(limits, chunk_size);
-       limits->io_opt = chunk_size * sc->stripes;
+       blk_limits_io_opt(limits, chunk_size * sc->stripes);
 }
 
 static struct target_type stripe_target = {
index b4845b1..eee28fa 100644 (file)
@@ -130,7 +130,7 @@ struct mapped_device {
        /*
         * A list of ios that arrived while we were suspended.
         */
-       atomic_t pending;
+       atomic_t pending[2];
        wait_queue_head_t wait;
        struct work_struct work;
        struct bio_list deferred;
@@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io)
 {
        struct mapped_device *md = io->md;
        int cpu;
+       int rw = bio_data_dir(io->bio);
 
        io->start_time = jiffies;
 
        cpu = part_stat_lock();
        part_round_stats(cpu, &dm_disk(md)->part0);
        part_stat_unlock();
-       dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
+       dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io)
         * After this is decremented the bio must not be touched if it is
         * a barrier.
         */
-       dm_disk(md)->part0.in_flight = pending =
-               atomic_dec_return(&md->pending);
+       dm_disk(md)->part0.in_flight[rw] = pending =
+               atomic_dec_return(&md->pending[rw]);
+       pending += atomic_read(&md->pending[rw^0x1]);
 
        /* nudge anyone waiting on suspend queue */
        if (!pending)
@@ -586,7 +588,7 @@ static void dec_pending(struct dm_io *io, int error)
                         */
                        spin_lock_irqsave(&md->deferred_lock, flags);
                        if (__noflush_suspending(md)) {
-                               if (!bio_barrier(io->bio))
+                               if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
                                        bio_list_add_head(&md->deferred,
                                                          io->bio);
                        } else
@@ -598,7 +600,7 @@ static void dec_pending(struct dm_io *io, int error)
                io_error = io->error;
                bio = io->bio;
 
-               if (bio_barrier(bio)) {
+               if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
                        /*
                         * There can be just one barrier request so we use
                         * a per-device variable for error reporting.
@@ -1209,7 +1211,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 
        ci.map = dm_get_table(md);
        if (unlikely(!ci.map)) {
-               if (!bio_barrier(bio))
+               if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
                        bio_io_error(bio);
                else
                        if (!md->barrier_error)
@@ -1321,7 +1323,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
         * we have to queue this io for later.
         */
        if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
-           unlikely(bio_barrier(bio))) {
+           unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                up_read(&md->io_lock);
 
                if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1344,7 +1346,7 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
 {
        struct mapped_device *md = q->queuedata;
 
-       if (unlikely(bio_barrier(bio))) {
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        }
@@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor)
        if (!md->disk)
                goto bad_disk;
 
-       atomic_set(&md->pending, 0);
+       atomic_set(&md->pending[0], 0);
+       atomic_set(&md->pending[1], 0);
        init_waitqueue_head(&md->wait);
        INIT_WORK(&md->work, dm_wq_work);
        init_waitqueue_head(&md->eventq);
@@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
                                break;
                        }
                        spin_unlock_irqrestore(q->queue_lock, flags);
-               } else if (!atomic_read(&md->pending))
+               } else if (!atomic_read(&md->pending[0]) &&
+                                       !atomic_read(&md->pending[1]))
                        break;
 
                if (interruptible == TASK_INTERRUPTIBLE &&
@@ -2164,7 +2168,7 @@ static void dm_wq_work(struct work_struct *work)
                if (dm_request_based(md))
                        generic_make_request(c);
                else {
-                       if (bio_barrier(c))
+                       if (bio_rw_flagged(c, BIO_RW_BARRIER))
                                process_barrier(md, c);
                        else
                                __split_and_process_bio(md, c);
index 5fe39c2..ea48429 100644 (file)
@@ -288,7 +288,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
        sector_t start_sector;
        int cpu;
 
-       if (unlikely(bio_barrier(bio))) {
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        }
index 7140909..89e7681 100644 (file)
@@ -90,7 +90,7 @@ static void multipath_end_request(struct bio *bio, int error)
 
        if (uptodate)
                multipath_end_bh_io(mp_bh, 0);
-       else if (!bio_rw_ahead(bio)) {
+       else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
                /*
                 * oops, IO error:
                 */
@@ -144,7 +144,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
        const int rw = bio_data_dir(bio);
        int cpu;
 
-       if (unlikely(bio_barrier(bio))) {
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        }
index 898e2bd..f845ed9 100644 (file)
@@ -448,7 +448,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
        const int rw = bio_data_dir(bio);
        int cpu;
 
-       if (unlikely(bio_barrier(bio))) {
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        }
index 8726fd7..ff7ed33 100644 (file)
@@ -782,8 +782,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
        struct bio_list bl;
        struct page **behind_pages = NULL;
        const int rw = bio_data_dir(bio);
-       const int do_sync = bio_sync(bio);
-       int cpu, do_barriers;
+       const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+       int cpu;
+       bool do_barriers;
        mdk_rdev_t *blocked_rdev;
 
        /*
@@ -797,7 +798,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
 
        md_write_start(mddev, bio); /* wait on superblock update early */
 
-       if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+       if (unlikely(!mddev->barriers_work &&
+                    bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                if (rw == WRITE)
                        md_write_end(mddev);
                bio_endio(bio, -EOPNOTSUPP);
@@ -925,7 +927,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
        atomic_set(&r1_bio->remaining, 0);
        atomic_set(&r1_bio->behind_remaining, 0);
 
-       do_barriers = bio_barrier(bio);
+       do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
        if (do_barriers)
                set_bit(R1BIO_Barrier, &r1_bio->state);
 
@@ -1600,7 +1602,7 @@ static void raid1d(mddev_t *mddev)
                         * We already have a nr_pending reference on these rdevs.
                         */
                        int i;
-                       const int do_sync = bio_sync(r1_bio->master_bio);
+                       const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
                        clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
                        clear_bit(R1BIO_Barrier, &r1_bio->state);
                        for (i=0; i < conf->raid_disks; i++)
@@ -1654,7 +1656,7 @@ static void raid1d(mddev_t *mddev)
                                       (unsigned long long)r1_bio->sector);
                                raid_end_bio_io(r1_bio);
                        } else {
-                               const int do_sync = bio_sync(r1_bio->master_bio);
+                               const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
                                r1_bio->bios[r1_bio->read_disk] =
                                        mddev->ro ? IO_BLOCKED : NULL;
                                r1_bio->read_disk = disk;
index 3d9020c..d0a2152 100644 (file)
@@ -796,12 +796,12 @@ static int make_request(struct request_queue *q, struct bio * bio)
        int i;
        int chunk_sects = conf->chunk_mask + 1;
        const int rw = bio_data_dir(bio);
-       const int do_sync = bio_sync(bio);
+       const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
        struct bio_list bl;
        unsigned long flags;
        mdk_rdev_t *blocked_rdev;
 
-       if (unlikely(bio_barrier(bio))) {
+       if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
                bio_endio(bio, -EOPNOTSUPP);
                return 0;
        }
@@ -1610,7 +1610,7 @@ static void raid10d(mddev_t *mddev)
                                raid_end_bio_io(r10_bio);
                                bio_put(bio);
                        } else {
-                               const int do_sync = bio_sync(r10_bio->master_bio);
+                               const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
                                bio_put(bio);
                                rdev = conf->mirrors[mirror].rdev;
                                if (printk_ratelimit())
index b8a2c5d..826eb34 100644 (file)
@@ -3606,7 +3606,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
        const int rw = bio_data_dir(bi);
        int cpu, remaining;
 
-       if (unlikely(bio_barrier(bi))) {
+       if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
                bio_endio(bi, -EOPNOTSUPP);
                return 0;
        }
index 662024d..5987da8 100644 (file)
@@ -898,8 +898,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                                scsi_print_sense("", cmd);
                        scsi_print_command(cmd);
                }
-               blk_end_request_all(req, -EIO);
-               scsi_next_command(cmd);
+               if (blk_end_request_err(req, -EIO))
+                       scsi_requeue_command(q, cmd);
+               else
+                       scsi_next_command(cmd);
                break;
        case ACTION_REPREP:
                /* Unprep the request and put it back at the head of the queue.
index 8472418..ac85773 100644 (file)
@@ -112,8 +112,9 @@ static int dst_request(struct request_queue *q, struct bio *bio)
                 * I worked with.
                 *
                 * Empty barriers are not allowed anyway, see 51fd77bd9f512
-                * for example, although later it was changed to bio_discard()
-                * only, which does not work in this case.
+                * for example, although later it was changed to
+                * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
+                * work in this case.
                 */
                //err = -EOPNOTSUPP;
                err = 0;
index 72a2b9c..535f85b 100644 (file)
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 static void btrfs_issue_discard(struct block_device *bdev,
                                u64 start, u64 len)
 {
-       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
+                            DISCARD_FL_BARRIER);
 }
 #endif
 
index 5dbefd1..5cf405b 100644 (file)
@@ -260,7 +260,7 @@ loop_lock:
                num_run++;
                batch_run++;
 
-               if (bio_sync(cur))
+               if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
                        num_sync_run++;
 
                if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        bio->bi_rw |= rw;
 
        spin_lock(&device->io_lock);
-       if (bio_sync(bio))
+       if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
                pending_bios = &device->pending_sync_bios;
        else
                pending_bios = &device->pending_bios;
index 18d3a28..28c590b 100644 (file)
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
                                        goto start_new_extent;
                                if ((start + nr_sects) != blk) {
                                        rv = blkdev_issue_discard(bdev, start,
-                                                           nr_sects, GFP_NOFS);
+                                                           nr_sects, GFP_NOFS,
+                                                           DISCARD_FL_BARRIER);
                                        if (rv)
                                                goto fail;
                                        nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
                }
        }
        if (nr_sects) {
-               rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
+               rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
+                                        DISCARD_FL_BARRIER);
                if (rv)
                        goto fail;
        }
index ea4e6cb..619ba99 100644 (file)
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
                part_stat_read(p, merges[WRITE]),
                (unsigned long long)part_stat_read(p, sectors[WRITE]),
                jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-               p->in_flight,
+               part_in_flight(p),
                jiffies_to_msecs(part_stat_read(p, io_ticks)),
                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
+ssize_t part_inflight_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
+{
+       struct hd_struct *p = dev_to_part(dev);
+
+       return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+}
+
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
                       struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
        &dev_attr_size.attr,
        &dev_attr_alignment_offset.attr,
        &dev_attr_stat.attr,
+       &dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        &dev_attr_fail.attr,
 #endif
index 8190237..7394e9e 100644 (file)
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                len = left;
 
        ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
-       if (ret > 0)
+       if (ret > 0) {
                *ppos += ret;
+               file_accessed(in);
+       }
 
        return ret;
 }
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
                mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
                ret = file_remove_suid(out);
-               if (!ret)
+               if (!ret) {
+                       file_update_time(out);
                        ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+               }
                mutex_unlock(&inode->i_mutex);
        } while (ret > 0);
        splice_from_pipe_end(pipe, &sd);
index 2892b71..5be93f1 100644 (file)
@@ -142,56 +142,51 @@ struct bio {
  *
  * bit 0 -- data direction
  *     If not set, bio is a read from device. If set, it's a write to device.
- * bit 1 -- rw-ahead when set
- * bit 2 -- barrier
+ * bit 1 -- fail fast device errors
+ * bit 2 -- fail fast transport errors
+ * bit 3 -- fail fast driver errors
+ * bit 4 -- rw-ahead when set
+ * bit 5 -- barrier
  *     Insert a serialization point in the IO queue, forcing previously
  *     submitted IO to be completed before this one is issued.
- * bit 3 -- synchronous I/O hint.
- * bit 4 -- Unplug the device immediately after submitting this bio.
- * bit 5 -- metadata request
+ * bit 6 -- synchronous I/O hint.
+ * bit 7 -- Unplug the device immediately after submitting this bio.
+ * bit 8 -- metadata request
  *     Used for tracing to differentiate metadata and data IO. May also
  *     get some preferential treatment in the IO scheduler
- * bit 6 -- discard sectors
+ * bit 9 -- discard sectors
  *     Informs the lower level device that this range of sectors is no longer
  *     used by the file system and may thus be freed by the device. Used
  *     for flash based storage.
- * bit 7 -- fail fast device errors
- * bit 8 -- fail fast transport errors
- * bit 9 -- fail fast driver errors
  *     Don't want driver retries for any fast fail whatever the reason.
  * bit 10 -- Tell the IO scheduler not to wait for more requests after this
        one has been submitted, even if it is a SYNC request.
  */
-#define BIO_RW         0       /* Must match RW in req flags (blkdev.h) */
-#define BIO_RW_AHEAD   1       /* Must match FAILFAST in req flags */
-#define BIO_RW_BARRIER 2
-#define BIO_RW_SYNCIO  3
-#define BIO_RW_UNPLUG  4
-#define BIO_RW_META    5
-#define BIO_RW_DISCARD 6
-#define BIO_RW_FAILFAST_DEV            7
-#define BIO_RW_FAILFAST_TRANSPORT      8
-#define BIO_RW_FAILFAST_DRIVER         9
-#define BIO_RW_NOIDLE  10
-
-#define bio_rw_flagged(bio, flag)      ((bio)->bi_rw & (1 << (flag)))
+enum bio_rw_flags {
+       BIO_RW,
+       BIO_RW_FAILFAST_DEV,
+       BIO_RW_FAILFAST_TRANSPORT,
+       BIO_RW_FAILFAST_DRIVER,
+       /* above flags must match REQ_* */
+       BIO_RW_AHEAD,
+       BIO_RW_BARRIER,
+       BIO_RW_SYNCIO,
+       BIO_RW_UNPLUG,
+       BIO_RW_META,
+       BIO_RW_DISCARD,
+       BIO_RW_NOIDLE,
+};
 
 /*
- * Old defines, these should eventually be replaced by direct usage of
- * bio_rw_flagged()
+ * First four bits must match between bio->bi_rw and rq->cmd_flags, make
+ * that explicit here.
  */
-#define bio_barrier(bio)       bio_rw_flagged(bio, BIO_RW_BARRIER)
-#define bio_sync(bio)          bio_rw_flagged(bio, BIO_RW_SYNCIO)
-#define bio_unplug(bio)                bio_rw_flagged(bio, BIO_RW_UNPLUG)
-#define bio_failfast_dev(bio)  bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV)
-#define bio_failfast_transport(bio)    \
-               bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT)
-#define bio_failfast_driver(bio)       \
-               bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER)
-#define bio_rw_ahead(bio)      bio_rw_flagged(bio, BIO_RW_AHEAD)
-#define bio_rw_meta(bio)       bio_rw_flagged(bio, BIO_RW_META)
-#define bio_discard(bio)       bio_rw_flagged(bio, BIO_RW_DISCARD)
-#define bio_noidle(bio)                bio_rw_flagged(bio, BIO_RW_NOIDLE)
+#define BIO_RW_RQ_MASK         0xf
+
+static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
+{
+       return (bio->bi_rw & (1 << flag)) != 0;
+}
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -216,7 +211,7 @@ struct bio {
 #define bio_offset(bio)                bio_iovec((bio))->bv_offset
 #define bio_segments(bio)      ((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)       ((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
+#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644 (file)
index 0000000..308734d
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef BLK_IOPOLL_H
+#define BLK_IOPOLL_H
+
+struct blk_iopoll;
+typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
+
+struct blk_iopoll {
+       struct list_head list;
+       unsigned long state;
+       unsigned long data;
+       int weight;
+       int max;
+       blk_iopoll_fn *poll;
+};
+
+enum {
+       IOPOLL_F_SCHED          = 0,
+       IOPOLL_F_DISABLE        = 1,
+};
+
+/*
+ * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
+ * that we were the first to acquire this iop for scheduling. If this iop
+ * is currently disabled, return "failure".
+ */
+static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
+{
+       if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
+               return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+
+       return 1;
+}
+
+static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
+{
+       return test_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+
+extern void blk_iopoll_sched(struct blk_iopoll *);
+extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
+extern void blk_iopoll_complete(struct blk_iopoll *);
+extern void __blk_iopoll_complete(struct blk_iopoll *);
+extern void blk_iopoll_enable(struct blk_iopoll *);
+extern void blk_iopoll_disable(struct blk_iopoll *);
+
+extern int blk_iopoll_enabled;
+
+#endif
index 69103e0..e23a86c 100644 (file)
@@ -86,13 +86,14 @@ enum {
 };
 
 /*
- * request type modified bits. first two bits match BIO_RW* bits, important
+ * request type modified bits. first four bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
        __REQ_RW,               /* not set, read. set, write */
        __REQ_FAILFAST_DEV,     /* no driver retries of device errors */
        __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
        __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
+       /* above flags must match BIO_RW_* */
        __REQ_DISCARD,          /* request to discard sectors */
        __REQ_SORTED,           /* elevator knows about this request */
        __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
@@ -114,6 +115,7 @@ enum rq_flag_bits {
        __REQ_INTEGRITY,        /* integrity metadata has been remapped */
        __REQ_NOIDLE,           /* Don't anticipate more IO after this one */
        __REQ_IO_STAT,          /* account I/O stat */
+       __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -142,6 +144,10 @@ enum rq_flag_bits {
 #define REQ_INTEGRITY  (1 << __REQ_INTEGRITY)
 #define REQ_NOIDLE     (1 << __REQ_NOIDLE)
 #define REQ_IO_STAT    (1 << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE        (1 << __REQ_MIXED_MERGE)
+
+#define REQ_FAILFAST_MASK      (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
+                                REQ_FAILFAST_DRIVER)
 
 #define BLK_MAX_CDB    16
 
@@ -453,10 +459,12 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14      /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
+#define QUEUE_FLAG_CQ         16       /* hardware does queuing */
 
 #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_CLUSTER) |            \
-                                (1 << QUEUE_FLAG_STACKABLE))
+                                (1 << QUEUE_FLAG_STACKABLE)    |       \
+                                (1 << QUEUE_FLAG_SAME_COMP))
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -575,6 +583,7 @@ enum {
 
 #define blk_queue_plugged(q)   test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)    test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
+#define blk_queue_queuing(q)   test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)    test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
@@ -828,11 +837,13 @@ static inline void blk_run_address_space(struct address_space *mapping)
 }
 
 /*
- * blk_rq_pos()                : the current sector
- * blk_rq_bytes()      : bytes left in the entire request
- * blk_rq_cur_bytes()  : bytes left in the current segment
- * blk_rq_sectors()    : sectors left in the entire request
- * blk_rq_cur_sectors()        : sectors left in the current segment
+ * blk_rq_pos()                        : the current sector
+ * blk_rq_bytes()              : bytes left in the entire request
+ * blk_rq_cur_bytes()          : bytes left in the current segment
+ * blk_rq_err_bytes()          : bytes left till the next error boundary
+ * blk_rq_sectors()            : sectors left in the entire request
+ * blk_rq_cur_sectors()                : sectors left in the current segment
+ * blk_rq_err_sectors()                : sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -849,6 +860,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq)
        return rq->bio ? bio_cur_bytes(rq->bio) : 0;
 }
 
+extern unsigned int blk_rq_err_bytes(const struct request *rq);
+
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
        return blk_rq_bytes(rq) >> 9;
@@ -859,6 +872,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
        return blk_rq_cur_bytes(rq) >> 9;
 }
 
+static inline unsigned int blk_rq_err_sectors(const struct request *rq)
+{
+       return blk_rq_err_bytes(rq) >> 9;
+}
+
 /*
  * Request issue related functions.
  */
@@ -885,10 +903,12 @@ extern bool blk_end_request(struct request *rq, int error,
                            unsigned int nr_bytes);
 extern void blk_end_request_all(struct request *rq, int error);
 extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool blk_end_request_err(struct request *rq, int error);
 extern bool __blk_end_request(struct request *rq, int error,
                              unsigned int nr_bytes);
 extern void __blk_end_request_all(struct request *rq, int error);
 extern bool __blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request_err(struct request *rq, int error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
@@ -915,6 +935,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
                                       unsigned int alignment);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
+extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
@@ -977,15 +998,18 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 }
 
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
-extern int blkdev_issue_discard(struct block_device *,
-                               sector_t sector, sector_t nr_sects, gfp_t);
+#define DISCARD_FL_WAIT                0x01    /* wait for completion */
+#define DISCARD_FL_BARRIER     0x02    /* issue DISCARD_BARRIER request */
+extern int blkdev_issue_discard(struct block_device *, sector_t sector,
+               sector_t nr_sects, gfp_t, int flags);
 
 static inline int sb_issue_discard(struct super_block *sb,
                                   sector_t block, sector_t nr_blocks)
 {
        block <<= (sb->s_blocksize_bits - 9);
        nr_blocks <<= (sb->s_blocksize_bits - 9);
-       return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
+       return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
+                                   DISCARD_FL_BARRIER);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
index 37f5321..b21cf6b 100644 (file)
@@ -161,8 +161,8 @@ struct inodes_stat_t {
  * These aren't really reads or writes, they pass down information about
  * parts of device that are now unused by the file system.
  */
-#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
-#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
+#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
 
 #define SEL_IN         1
 #define SEL_OUT                2
index 45fc320..44263cb 100644 (file)
@@ -98,7 +98,7 @@ struct hd_struct {
        int make_it_fail;
 #endif
        unsigned long stamp;
-       int in_flight;
+       int in_flight[2];
 #ifdef CONFIG_SMP
        struct disk_stats *dkstats;
 #else
@@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)                     \
        part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part)
+static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-       part->in_flight++;
+       part->in_flight[rw]++;
        if (part->partno)
-               part_to_disk(part)->part0.in_flight++;
+               part_to_disk(part)->part0.in_flight[rw]++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part)
+static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-       part->in_flight--;
+       part->in_flight[rw]--;
        if (part->partno)
-               part_to_disk(part)->part0.in_flight--;
+               part_to_disk(part)->part0.in_flight[rw]--;
+}
+
+static inline int part_in_flight(struct hd_struct *part)
+{
+       return part->in_flight[0] + part->in_flight[1];
 }
 
 /* block/blk-core.c */
@@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
+extern ssize_t part_inflight_show(struct device *dev,
+                             struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
                              struct device_attribute *attr, char *buf);
index 1ac57e5..8e9e151 100644 (file)
@@ -348,6 +348,7 @@ enum
        NET_TX_SOFTIRQ,
        NET_RX_SOFTIRQ,
        BLOCK_SOFTIRQ,
+       BLOCK_IOPOLL_SOFTIRQ,
        TASKLET_SOFTIRQ,
        SCHED_SOFTIRQ,
        HRTIMER_SOFTIRQ,
index 3125cff..6bb59f7 100644 (file)
@@ -91,6 +91,7 @@ extern int sysctl_nr_trim_pages;
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+extern int blk_iopoll_enabled;
 
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -997,7 +998,14 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
-
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "blk_iopoll",
+               .data           = &blk_iopoll_enabled,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
index 8ffdc0d..74f1102 100644 (file)
@@ -161,7 +161,8 @@ static int discard_swap(struct swap_info_struct *si)
                }
 
                err = blkdev_issue_discard(si->bdev, start_block,
-                                               nr_blocks, GFP_KERNEL);
+                                               nr_blocks, GFP_KERNEL,
+                                               DISCARD_FL_BARRIER);
                if (err)
                        break;
 
@@ -200,7 +201,8 @@ static void discard_swap_cluster(struct swap_info_struct *si,
                        start_block <<= PAGE_SHIFT - 9;
                        nr_blocks <<= PAGE_SHIFT - 9;
                        if (blkdev_issue_discard(si->bdev, start_block,
-                                                       nr_blocks, GFP_NOIO))
+                                                       nr_blocks, GFP_NOIO,
+                                                       DISCARD_FL_BARRIER))
                                break;
                }