#include <linux/task_io_accounting_ops.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
#include "blk.h"
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
static int __make_request(struct request_queue *q, struct bio *bio);
* need bouncing, by calling the appropriate masked or flagged allocator,
* suitable for the target device. Otherwise the call to blk_queue_bounce will
* BUG.
+ *
+ * WARNING: When allocating/cloning a bio-chain, careful consideration should be
+ * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
+ * anything but the first bio in the chain. Otherwise you risk waiting for IO
+ * completion of a bio that hasn't been submitted yet, thus resulting in a
+ * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
+ * of bio_alloc(), as that avoids the mempool deadlock.
+ * If possible a big IO should be split into smaller parts when allocation
+ * fails. Partial allocation should not be an error, or you risk a live-lock.
*/
struct request *blk_make_request(struct request_queue *q, struct bio *bio,
gfp_t gfp_mask)
*/
void blk_requeue_request(struct request_queue *q, struct request *rq)
{
- BUG_ON(blk_queued_rq(rq));
-
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
+ BUG_ON(blk_queued_rq(rq));
+
elv_requeue_request(q, rq);
}
EXPORT_SYMBOL(blk_requeue_request);
bio->bi_bdev = bdev->bd_contains;
trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
- bdev->bd_dev, bio->bi_sector,
+ bdev->bd_dev,
bio->bi_sector - p->start_sect);
}
}
goto end_io;
}
- if (unlikely(nr_sectors > q->max_hw_sectors)) {
+ if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
- bdevname(bio->bi_bdev, b),
- bio_sectors(bio),
- q->max_hw_sectors);
+ bdevname(bio->bi_bdev, b),
+ bio_sectors(bio),
+ queue_max_hw_sectors(q));
goto end_io;
}
goto end_io;
if (old_sector != -1)
- trace_block_remap(q, bio, old_dev, bio->bi_sector,
- old_sector);
+ trace_block_remap(q, bio, old_dev, old_sector);
trace_block_bio_queue(q, bio);
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
{
- if (blk_rq_sectors(rq) > q->max_sectors ||
- blk_rq_bytes(rq) > q->max_hw_sectors << 9) {
+ if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
+ blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
}
* limitation.
*/
blk_recalc_rq_segments(rq);
- if (rq->nr_phys_segments > q->max_phys_segments ||
- rq->nr_phys_segments > q->max_hw_segments) {
+ if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
+ rq->nr_phys_segments > queue_max_hw_segments(q)) {
printk(KERN_ERR "%s: over max segments limit.\n", __func__);
return -EIO;
}
break;
} else if (ret == BLKPREP_KILL) {
rq->cmd_flags |= REQ_QUIET;
+ /*
+ * Mark this request as started so we don't trigger
+ * any debug logic in the end I/O path.
+ */
+ blk_start_request(rq);
__blk_end_request_all(rq, -EIO);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
* the driver side.
*/
if (blk_account_rq(rq))
- q->in_flight++;
+ q->in_flight[rq_is_sync(rq)]++;
}
/**
* resid_len to full count and add the timeout handler.
*/
req->resid_len = blk_rq_bytes(req);
+ if (unlikely(blk_bidi_rq(req)))
+ req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
+
blk_add_timer(req);
}
EXPORT_SYMBOL(blk_start_request);
} else {
int idx = bio->bi_idx + next_idx;
- if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
+ if (unlikely(idx >= bio->bi_vcnt)) {
blk_dump_rq_flags(req, "__end_that");
printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
- __func__, bio->bi_idx, bio->bi_vcnt);
+ __func__, idx, bio->bi_vcnt);
break;
}
*/
static void blk_finish_request(struct request *req, int error)
{
- BUG_ON(blk_queued_rq(req));
-
if (blk_rq_tagged(req))
blk_queue_end_tag(req->q, req);
+ BUG_ON(blk_queued_rq(req));
+
if (unlikely(laptop_mode) && blk_fs_request(req))
laptop_io_completion();
}
EXPORT_SYMBOL_GPL(blk_lld_busy);
+/**
+ * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
+ * @rq: the clone request to be cleaned up
+ *
+ * Description:
+ * Free all bios in @rq for a cloned request.
+ */
+void blk_rq_unprep_clone(struct request *rq)
+{
+ struct bio *bio;
+
+ while ((bio = rq->bio) != NULL) {
+ rq->bio = bio->bi_next;
+
+ bio_put(bio);
+ }
+}
+EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
+
+/*
+ * Copy attributes of the original request to the clone request.
+ * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ */
+static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+{
+ dst->cpu = src->cpu;
+ dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+ dst->cmd_type = src->cmd_type;
+ dst->__sector = blk_rq_pos(src);
+ dst->__data_len = blk_rq_bytes(src);
+ dst->nr_phys_segments = src->nr_phys_segments;
+ dst->ioprio = src->ioprio;
+ dst->extra_len = src->extra_len;
+}
+
+/**
+ * blk_rq_prep_clone - Helper function to setup clone request
+ * @rq: the request to be setup
+ * @rq_src: original request to be cloned
+ * @bs: bio_set that bios for clone are allocated from
+ * @gfp_mask: memory allocation mask for bio
+ * @bio_ctr: setup function to be called for each clone bio.
+ * Returns %0 for success, non %0 for failure.
+ * @data: private data to be passed to @bio_ctr
+ *
+ * Description:
+ * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
+ * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ * are not copied, and copying such parts is the caller's responsibility.
+ * Also, pages which the original bios are pointing to are not copied
+ * and the cloned bios just point same pages.
+ * So cloned bios must be completed before original bios, which means
+ * the caller must complete @rq before @rq_src.
+ */
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+ struct bio_set *bs, gfp_t gfp_mask,
+ int (*bio_ctr)(struct bio *, struct bio *, void *),
+ void *data)
+{
+ struct bio *bio, *bio_src;
+
+ if (!bs)
+ bs = fs_bio_set;
+
+ blk_rq_init(NULL, rq);
+
+ __rq_for_each_bio(bio_src, rq_src) {
+ bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
+ if (!bio)
+ goto free_and_out;
+
+ __bio_clone(bio, bio_src);
+
+ if (bio_integrity(bio_src) &&
+ bio_integrity_clone(bio, bio_src, gfp_mask))
+ goto free_and_out;
+
+ if (bio_ctr && bio_ctr(bio, bio_src, data))
+ goto free_and_out;
+
+ if (rq->bio) {
+ rq->biotail->bi_next = bio;
+ rq->biotail = bio;
+ } else
+ rq->bio = rq->biotail = bio;
+ }
+
+ __blk_rq_prep_clone(rq, rq_src);
+
+ return 0;
+
+free_and_out:
+ if (bio)
+ bio_free(bio, bs);
+ blk_rq_unprep_clone(rq);
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
+
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
{
return queue_work(kblockd_workqueue, work);