blkcg: add blkg_policy_data->plid
[linux-3.10.git] / block / blk-core.c
index d2da641..c973249 100644 (file)
@@ -40,6 +40,7 @@
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
 
@@ -219,12 +220,13 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time.
+ *   restarted around the specified time. Queue lock must be held.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
-       queue_delayed_work(kblockd_workqueue, &q->delay_work,
-                               msecs_to_jiffies(msecs));
+       if (likely(!blk_queue_dead(q)))
+               queue_delayed_work(kblockd_workqueue, &q->delay_work,
+                                  msecs_to_jiffies(msecs));
 }
 EXPORT_SYMBOL(blk_delay_queue);
 
@@ -293,6 +295,34 @@ void blk_sync_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_sync_queue);
 
 /**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q: The queue to run
+ *
+ * Description:
+ *    Invoke request handling on a queue if there are any pending requests.
+ *    May be used to restart request handling after a request has completed.
+ *    This variant runs the queue whether or not the queue has been
+ *    stopped. Must be called with the queue lock held and interrupts
+ *    disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+       if (unlikely(blk_queue_dead(q)))
+               return;
+
+       /*
+        * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+        * the queue lock internally. As a result multiple threads may be
+        * running such a request function concurrently. Keep track of the
+        * number of active request_fn invocations such that blk_drain_queue()
+        * can wait until all these request_fn calls have finished.
+        */
+       q->request_fn_active++;
+       q->request_fn(q);
+       q->request_fn_active--;
+}
+
+/**
  * __blk_run_queue - run a single device queue
  * @q: The queue to run
  *
@@ -305,7 +335,7 @@ void __blk_run_queue(struct request_queue *q)
        if (unlikely(blk_queue_stopped(q)))
                return;
 
-       q->request_fn(q);
+       __blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -315,11 +345,11 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us.
+ *    of us. The caller must hold the queue lock.
  */
 void blk_run_queue_async(struct request_queue *q)
 {
-       if (likely(!blk_queue_stopped(q)))
+       if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
                mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
 EXPORT_SYMBOL(blk_run_queue_async);
@@ -349,7 +379,7 @@ void blk_put_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_put_queue);
 
 /**
- * blk_drain_queue - drain requests from request_queue
+ * __blk_drain_queue - drain requests from request_queue
  * @q: queue to drain
  * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
  *
@@ -357,15 +387,17 @@ EXPORT_SYMBOL(blk_put_queue);
  * If not, only ELVPRIV requests are drained.  The caller is responsible
  * for ensuring that no new requests which need to be drained are queued.
  */
-void blk_drain_queue(struct request_queue *q, bool drain_all)
+static void __blk_drain_queue(struct request_queue *q, bool drain_all)
+       __releases(q->queue_lock)
+       __acquires(q->queue_lock)
 {
        int i;
 
+       lockdep_assert_held(q->queue_lock);
+
        while (true) {
                bool drain = false;
 
-               spin_lock_irq(q->queue_lock);
-
                /*
                 * The caller might be trying to drain @q before its
                 * elevator is initialized.
@@ -386,6 +418,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                        __blk_run_queue(q);
 
                drain |= q->nr_rqs_elvpriv;
+               drain |= q->request_fn_active;
 
                /*
                 * Unfortunately, requests are queued at and tracked from
@@ -401,11 +434,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
                        }
                }
 
-               spin_unlock_irq(q->queue_lock);
-
                if (!drain)
                        break;
+
+               spin_unlock_irq(q->queue_lock);
+
                msleep(10);
+
+               spin_lock_irq(q->queue_lock);
        }
 
        /*
@@ -416,13 +452,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
        if (q->request_fn) {
                struct request_list *rl;
 
-               spin_lock_irq(q->queue_lock);
-
                blk_queue_for_each_rl(rl, q)
                        for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
                                wake_up_all(&rl->wait[i]);
-
-               spin_unlock_irq(q->queue_lock);
        }
 }
 
@@ -446,7 +478,10 @@ void blk_queue_bypass_start(struct request_queue *q)
        spin_unlock_irq(q->queue_lock);
 
        if (drain) {
-               blk_drain_queue(q, false);
+               spin_lock_irq(q->queue_lock);
+               __blk_drain_queue(q, false);
+               spin_unlock_irq(q->queue_lock);
+
                /* ensure blk_queue_bypass() is %true inside RCU read lock */
                synchronize_rcu();
        }
@@ -473,20 +508,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it.  All future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
        spinlock_t *lock = q->queue_lock;
 
-       /* mark @q DEAD, no new request or merges will be allowed afterwards */
+       /* mark @q DYING, no new request or merges will be allowed afterwards */
        mutex_lock(&q->sysfs_lock);
-       queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+       queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
        spin_lock_irq(lock);
 
        /*
-        * Dead queue is permanently in bypass mode till released.  Note
+        * A dying queue is permanently in bypass mode till released.  Note
         * that, unlike blk_queue_bypass_start(), we aren't performing
         * synchronize_rcu() after entering bypass mode to avoid the delay
         * as some drivers create and destroy a lot of queues while
@@ -499,12 +534,18 @@ void blk_cleanup_queue(struct request_queue *q)
 
        queue_flag_set(QUEUE_FLAG_NOMERGES, q);
        queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-       queue_flag_set(QUEUE_FLAG_DEAD, q);
+       queue_flag_set(QUEUE_FLAG_DYING, q);
        spin_unlock_irq(lock);
        mutex_unlock(&q->sysfs_lock);
 
-       /* drain all requests queued before DEAD marking */
-       blk_drain_queue(q, true);
+       /*
+        * Drain all requests queued before DYING marking. Set DEAD flag to
+        * prevent that q->request_fn() gets invoked after draining finished.
+        */
+       spin_lock_irq(lock);
+       __blk_drain_queue(q, true);
+       queue_flag_set(QUEUE_FLAG_DEAD, q);
+       spin_unlock_irq(lock);
 
        /* @q won't process any more request, flush async actions */
        del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
@@ -549,7 +590,7 @@ void blk_exit_rl(struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-       return blk_alloc_queue_node(gfp_mask, -1);
+       return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -606,8 +647,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        /*
         * A queue starts its life with bypass turned on to avoid
         * unnecessary bypass on/off overhead and nasty surprises during
-        * init.  The initial bypass will be finished at the end of
-        * blk_init_allocated_queue().
+        * init.  The initial bypass will be finished when the queue is
+        * registered by blk_register_queue().
         */
        q->bypass_depth = 1;
        __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
@@ -660,7 +701,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
 
 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
 {
-       return blk_init_queue_node(rfn, lock, -1);
+       return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
 }
 EXPORT_SYMBOL(blk_init_queue);
 
@@ -694,7 +735,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;
        q->unprep_rq_fn         = NULL;
-       q->queue_flags          = QUEUE_FLAG_DEFAULT;
+       q->queue_flags          |= QUEUE_FLAG_DEFAULT;
 
        /* Override internal queue lock with supplied lock pointer */
        if (lock)
@@ -710,18 +751,13 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
        /* init elevator */
        if (elevator_init(q, NULL))
                return NULL;
-
-       blk_queue_congestion_threshold(q);
-
-       /* all done, end the initial bypass */
-       blk_queue_bypass_end(q);
        return q;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 
 bool blk_get_queue(struct request_queue *q)
 {
-       if (likely(!blk_queue_dead(q))) {
+       if (likely(!blk_queue_dying(q))) {
                __blk_get_queue(q);
                return true;
        }
@@ -875,7 +911,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
        const bool is_sync = rw_is_sync(rw_flags) != 0;
        int may_queue;
 
-       if (unlikely(blk_queue_dead(q)))
+       if (unlikely(blk_queue_dying(q)))
                return NULL;
 
        may_queue = elv_may_queue(q, rw_flags);
@@ -1055,7 +1091,7 @@ retry:
        if (rq)
                return rq;
 
-       if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+       if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
                blk_put_rl(rl);
                return NULL;
        }
@@ -1657,8 +1693,8 @@ generic_make_request_checks(struct bio *bio)
                goto end_io;
        }
 
-       if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
-                    nr_sectors > queue_max_hw_sectors(q))) {
+       if (likely(bio_is_rw(bio) &&
+                  nr_sectors > queue_max_hw_sectors(q))) {
                printk(KERN_ERR "bio too big device %s (%u > %u)\n",
                       bdevname(bio->bi_bdev, b),
                       bio_sectors(bio),
@@ -1699,8 +1735,12 @@ generic_make_request_checks(struct bio *bio)
 
        if ((bio->bi_rw & REQ_DISCARD) &&
            (!blk_queue_discard(q) ||
-            ((bio->bi_rw & REQ_SECURE) &&
-             !blk_queue_secdiscard(q)))) {
+            ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
+               err = -EOPNOTSUPP;
+               goto end_io;
+       }
+
+       if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
                err = -EOPNOTSUPP;
                goto end_io;
        }
@@ -1810,15 +1850,20 @@ EXPORT_SYMBOL(generic_make_request);
  */
 void submit_bio(int rw, struct bio *bio)
 {
-       int count = bio_sectors(bio);
-
        bio->bi_rw |= rw;
 
        /*
         * If it's a regular read/write or a barrier with data attached,
         * go through the normal accounting stuff before submission.
         */
-       if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
+       if (bio_has_data(bio)) {
+               unsigned int count;
+
+               if (unlikely(rw & REQ_WRITE_SAME))
+                       count = bdev_logical_block_size(bio->bi_bdev) >> 9;
+               else
+                       count = bio_sectors(bio);
+
                if (rw & WRITE) {
                        count_vm_events(PGPGOUT, count);
                } else {
@@ -1864,11 +1909,10 @@ EXPORT_SYMBOL(submit_bio);
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
-       if (rq->cmd_flags & REQ_DISCARD)
+       if (!rq_mergeable(rq))
                return 0;
 
-       if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
-           blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
+       if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
                printk(KERN_ERR "%s: over max size limit.\n", __func__);
                return -EIO;
        }
@@ -1907,7 +1951,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
                return -EIO;
 
        spin_lock_irqsave(q->queue_lock, flags);
-       if (unlikely(blk_queue_dead(q))) {
+       if (unlikely(blk_queue_dying(q))) {
                spin_unlock_irqrestore(q->queue_lock, flags);
                return -ENODEV;
        }
@@ -2340,7 +2384,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
        req->buffer = bio_data(req->bio);
 
        /* update sector only for requests with clear definition of sector */
-       if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
+       if (req->cmd_type == REQ_TYPE_FS)
                req->__sector += total_bytes >> 9;
 
        /* mixed attributes always follow the first bio */
@@ -2781,16 +2825,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
        blk_rq_init(NULL, rq);
 
        __rq_for_each_bio(bio_src, rq_src) {
-               bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
+               bio = bio_clone_bioset(bio_src, gfp_mask, bs);
                if (!bio)
                        goto free_and_out;
 
-               __bio_clone(bio, bio_src);
-
-               if (bio_integrity(bio_src) &&
-                   bio_integrity_clone(bio, bio_src, gfp_mask, bs))
-                       goto free_and_out;
-
                if (bio_ctr && bio_ctr(bio, bio_src, data))
                        goto free_and_out;
 
@@ -2807,7 +2845,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 
 free_and_out:
        if (bio)
-               bio_free(bio, bs);
+               bio_put(bio);
        blk_rq_unprep_clone(rq);
 
        return -ENOMEM;
@@ -2871,7 +2909,8 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
        struct request *rqa = container_of(a, struct request, queuelist);
        struct request *rqb = container_of(b, struct request, queuelist);
 
-       return !(rqa->q <= rqb->q);
+       return !(rqa->q < rqb->q ||
+               (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
 }
 
 /*
@@ -2886,27 +2925,11 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 {
        trace_block_unplug(q, depth, !from_schedule);
 
-       /*
-        * Don't mess with dead queue.
-        */
-       if (unlikely(blk_queue_dead(q))) {
-               spin_unlock(q->queue_lock);
-               return;
-       }
-
-       /*
-        * If we are punting this to kblockd, then we can safely drop
-        * the queue_lock before waking kblockd (which needs to take
-        * this lock).
-        */
-       if (from_schedule) {
-               spin_unlock(q->queue_lock);
+       if (from_schedule)
                blk_run_queue_async(q);
-       } else {
+       else
                __blk_run_queue(q);
-               spin_unlock(q->queue_lock);
-       }
-
+       spin_unlock(q->queue_lock);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -2998,7 +3021,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                /*
                 * Short-circuit if @q is dead
                 */
-               if (unlikely(blk_queue_dead(q))) {
+               if (unlikely(blk_queue_dying(q))) {
                        __blk_end_request_all(rq, -ENODEV);
                        continue;
                }