#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
-#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
#include <trace/events/block.h>
#include "blk.h"
+#include "blk-cgroup.h"
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
/*
* Merge hash stuff.
*/
-static const int elv_hash_shift = 6;
-#define ELV_HASH_BLOCK(sec) ((sec) >> 3)
-#define ELV_HASH_FN(sec) \
- (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
-#define ELV_HASH_ENTRIES (1 << elv_hash_shift)
-#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
-#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
+#define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
/*
* Query io scheduler to see if the current process issuing bio may be
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_allow_merge_fn)
- return e->ops->elevator_allow_merge_fn(q, rq, bio);
+ if (e->type->ops.elevator_allow_merge_fn)
+ return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
return 1;
}
/*
* can we safely merge with this request?
*/
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
- if (!rq_mergeable(rq))
- return 0;
-
- /*
- * Don't merge file system requests and discard requests
- */
- if (bio_discard(bio) != bio_discard(rq->bio))
- return 0;
-
- /*
- * different data direction or already started, don't merge
- */
- if (bio_data_dir(bio) != rq_data_dir(rq))
- return 0;
-
- /*
- * must be same device and not a special request
- */
- if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
- return 0;
-
- /*
- * only merge integrity protected bio into ditto rq
- */
- if (bio_integrity(bio) != blk_integrity_rq(rq))
+ if (!blk_rq_merge_ok(rq, bio))
return 0;
if (!elv_iosched_allow_merge(rq, bio))
}
EXPORT_SYMBOL(elv_rq_merge_ok);
-static inline int elv_try_merge(struct request *__rq, struct bio *bio)
-{
- int ret = ELEVATOR_NO_MERGE;
-
- /*
- * we can merge and sequence is ok, check if it's possible
- */
- if (elv_rq_merge_ok(__rq, bio)) {
- if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
- ret = ELEVATOR_BACK_MERGE;
- else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
- ret = ELEVATOR_FRONT_MERGE;
- }
-
- return ret;
-}
-
static struct elevator_type *elevator_find(const char *name)
{
struct elevator_type *e;
module_put(e->elevator_owner);
}
-static struct elevator_type *elevator_get(const char *name)
+static struct elevator_type *elevator_get(const char *name, bool try_loading)
{
struct elevator_type *e;
spin_lock(&elv_list_lock);
e = elevator_find(name);
- if (!e) {
- char elv[ELV_NAME_MAX + strlen("-iosched")];
-
+ if (!e && try_loading) {
spin_unlock(&elv_list_lock);
-
- if (!strcmp(name, "anticipatory"))
- sprintf(elv, "as-iosched");
- else
- sprintf(elv, "%s-iosched", name);
-
- request_module("%s", elv);
+ request_module("%s-iosched", name);
spin_lock(&elv_list_lock);
e = elevator_find(name);
}
return e;
}
-static void *elevator_init_queue(struct request_queue *q,
- struct elevator_queue *eq)
-{
- return eq->ops->elevator_init_fn(q);
-}
-
-static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
- void *data)
-{
- q->elevator = eq;
- eq->elevator_data = data;
-}
-
-static char chosen_elevator[16];
+static char chosen_elevator[ELV_NAME_MAX];
static int __init elevator_setup(char *str)
{
* Be backwards-compatible with previous kernels, so users
* won't get the wrong elevator.
*/
- if (!strcmp(str, "as"))
- strcpy(chosen_elevator, "anticipatory");
- else
- strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
+ strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
return 1;
}
__setup("elevator=", elevator_setup);
+/* called during boot to load the elevator chosen by the elevator param */
+void __init load_default_elevator_module(void)
+{
+ struct elevator_type *e;
+
+ if (!chosen_elevator[0])
+ return;
+
+ spin_lock(&elv_list_lock);
+ e = elevator_find(chosen_elevator);
+ spin_unlock(&elv_list_lock);
+
+ if (!e)
+ request_module("%s-iosched", chosen_elevator);
+}
+
static struct kobj_type elv_ktype;
-static struct elevator_queue *elevator_alloc(struct request_queue *q,
+struct elevator_queue *elevator_alloc(struct request_queue *q,
struct elevator_type *e)
{
struct elevator_queue *eq;
- int i;
eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
if (unlikely(!eq))
goto err;
- eq->ops = &e->ops;
- eq->elevator_type = e;
+ eq->type = e;
kobject_init(&eq->kobj, &elv_ktype);
mutex_init(&eq->sysfs_lock);
-
- eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
- GFP_KERNEL, q->node);
- if (!eq->hash)
- goto err;
-
- for (i = 0; i < ELV_HASH_ENTRIES; i++)
- INIT_HLIST_HEAD(&eq->hash[i]);
+ hash_init(eq->hash);
return eq;
err:
elevator_put(e);
return NULL;
}
+EXPORT_SYMBOL(elevator_alloc);
static void elevator_release(struct kobject *kobj)
{
struct elevator_queue *e;
e = container_of(kobj, struct elevator_queue, kobj);
- elevator_put(e->elevator_type);
- kfree(e->hash);
+ elevator_put(e->type);
kfree(e);
}
int elevator_init(struct request_queue *q, char *name)
{
struct elevator_type *e = NULL;
- struct elevator_queue *eq;
- int ret = 0;
- void *data;
+ int err;
+
+ /*
+ * q->sysfs_lock must be held to provide mutual exclusion between
+ * elevator_switch() and here.
+ */
+ lockdep_assert_held(&q->sysfs_lock);
+
+ if (unlikely(q->elevator))
+ return 0;
INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
q->boundary_rq = NULL;
if (name) {
- e = elevator_get(name);
+ e = elevator_get(name, true);
if (!e)
return -EINVAL;
}
+ /*
+ * Use the default elevator specified by config boot param or
+ * config option. Don't try to load modules as we could be running
+ * off async and request_module() isn't allowed from async.
+ */
if (!e && *chosen_elevator) {
- e = elevator_get(chosen_elevator);
+ e = elevator_get(chosen_elevator, false);
if (!e)
printk(KERN_ERR "I/O scheduler %s not found\n",
chosen_elevator);
}
if (!e) {
- e = elevator_get(CONFIG_DEFAULT_IOSCHED);
+ e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
if (!e) {
printk(KERN_ERR
"Default I/O scheduler not found. " \
"Using noop.\n");
- e = elevator_get("noop");
+ e = elevator_get("noop", false);
}
}
- eq = elevator_alloc(q, e);
- if (!eq)
- return -ENOMEM;
-
- data = elevator_init_queue(q, eq);
- if (!data) {
- kobject_put(&eq->kobj);
- return -ENOMEM;
- }
-
- elevator_attach(q, eq, data);
- return ret;
+ err = e->ops.elevator_init_fn(q, e);
+ return 0;
}
EXPORT_SYMBOL(elevator_init);
void elevator_exit(struct elevator_queue *e)
{
mutex_lock(&e->sysfs_lock);
- if (e->ops->elevator_exit_fn)
- e->ops->elevator_exit_fn(e);
- e->ops = NULL;
+ if (e->type->ops.elevator_exit_fn)
+ e->type->ops.elevator_exit_fn(e);
mutex_unlock(&e->sysfs_lock);
kobject_put(&e->kobj);
}
EXPORT_SYMBOL(elevator_exit);
-static void elv_activate_rq(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e->ops->elevator_activate_req_fn)
- e->ops->elevator_activate_req_fn(q, rq);
-}
-
-static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e->ops->elevator_deactivate_req_fn)
- e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
static inline void __elv_rqhash_del(struct request *rq)
{
- hlist_del_init(&rq->hash);
+ hash_del(&rq->hash);
}
static void elv_rqhash_del(struct request_queue *q, struct request *rq)
struct elevator_queue *e = q->elevator;
BUG_ON(ELV_ON_HASH(rq));
- hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+ hash_add(e->hash, &rq->hash, rq_hash_key(rq));
}
static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
{
struct elevator_queue *e = q->elevator;
- struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
- struct hlist_node *entry, *next;
+ struct hlist_node *next;
struct request *rq;
- hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+ hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
BUG_ON(!ELV_ON_HASH(rq));
if (unlikely(!rq_mergeable(rq))) {
* RB-tree support functions for inserting/lookup/removal of requests
* in a sorted RB tree.
*/
-struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+void elv_rb_add(struct rb_root *root, struct request *rq)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
parent = *p;
__rq = rb_entry(parent, struct request, rb_node);
- if (rq->sector < __rq->sector)
+ if (blk_rq_pos(rq) < blk_rq_pos(__rq))
p = &(*p)->rb_left;
- else if (rq->sector > __rq->sector)
+ else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
p = &(*p)->rb_right;
- else
- return __rq;
}
rb_link_node(&rq->rb_node, parent, p);
rb_insert_color(&rq->rb_node, root);
- return NULL;
}
EXPORT_SYMBOL(elv_rb_add);
while (n) {
rq = rb_entry(n, struct request, rb_node);
- if (sector < rq->sector)
+ if (sector < blk_rq_pos(rq))
n = n->rb_left;
- else if (sector > rq->sector)
+ else if (sector > blk_rq_pos(rq))
n = n->rb_right;
else
return rq;
q->nr_sorted--;
boundary = q->end_sector;
- stop_flags = REQ_SOFTBARRIER | REQ_HARDBARRIER | REQ_STARTED;
+ stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
list_for_each_prev(entry, &q->queue_head) {
struct request *pos = list_entry_rq(entry);
- if (blk_discard_rq(rq) != blk_discard_rq(pos))
+ if ((rq->cmd_flags & REQ_DISCARD) !=
+ (pos->cmd_flags & REQ_DISCARD))
break;
if (rq_data_dir(rq) != rq_data_dir(pos))
break;
if (pos->cmd_flags & stop_flags)
break;
- if (rq->sector >= boundary) {
- if (pos->sector < boundary)
+ if (blk_rq_pos(rq) >= boundary) {
+ if (blk_rq_pos(pos) < boundary)
continue;
} else {
- if (pos->sector >= boundary)
+ if (blk_rq_pos(pos) >= boundary)
break;
}
- if (rq->sector >= pos->sector)
+ if (blk_rq_pos(rq) >= blk_rq_pos(pos))
break;
}
struct request *__rq;
int ret;
+ /*
+ * Levels of merges:
+ * nomerges: No merges at all attempted
+ * noxmerges: Only simple one-hit cache try
+ * merges: All merge tries attempted
+ */
+ if (blk_queue_nomerges(q))
+ return ELEVATOR_NO_MERGE;
+
/*
* First try one-hit cache.
*/
- if (q->last_merge) {
- ret = elv_try_merge(q->last_merge, bio);
+ if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+ ret = blk_try_merge(q->last_merge, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
return ret;
}
}
- if (blk_queue_nomerges(q))
+ if (blk_queue_noxmerges(q))
return ELEVATOR_NO_MERGE;
/*
return ELEVATOR_BACK_MERGE;
}
- if (e->ops->elevator_merge_fn)
- return e->ops->elevator_merge_fn(q, req, bio);
+ if (e->type->ops.elevator_merge_fn)
+ return e->type->ops.elevator_merge_fn(q, req, bio);
return ELEVATOR_NO_MERGE;
}
+/*
+ * Attempt to do an insertion back merge. Only check for the case where
+ * we can append 'rq' to an existing request, so we can throw 'rq' away
+ * afterwards.
+ *
+ * Returns true if we merged, false otherwise
+ */
+static bool elv_attempt_insert_merge(struct request_queue *q,
+ struct request *rq)
+{
+ struct request *__rq;
+ bool ret;
+
+ if (blk_queue_nomerges(q))
+ return false;
+
+ /*
+ * First try one-hit cache.
+ */
+ if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
+ return true;
+
+ if (blk_queue_noxmerges(q))
+ return false;
+
+ ret = false;
+ /*
+ * See if our hash lookup can find a potential backmerge.
+ */
+ while (1) {
+ __rq = elv_rqhash_find(q, blk_rq_pos(rq));
+ if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
+ break;
+
+ /* The merged request could be merged with others, try again */
+ ret = true;
+ rq = __rq;
+ }
+
+ return ret;
+}
+
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_merged_fn)
- e->ops->elevator_merged_fn(q, rq, type);
+ if (e->type->ops.elevator_merged_fn)
+ e->type->ops.elevator_merged_fn(q, rq, type);
if (type == ELEVATOR_BACK_MERGE)
elv_rqhash_reposition(q, rq);
struct request *next)
{
struct elevator_queue *e = q->elevator;
+ const int next_sorted = next->cmd_flags & REQ_SORTED;
- if (e->ops->elevator_merge_req_fn)
- e->ops->elevator_merge_req_fn(q, rq, next);
+ if (next_sorted && e->type->ops.elevator_merge_req_fn)
+ e->type->ops.elevator_merge_req_fn(q, rq, next);
elv_rqhash_reposition(q, rq);
- elv_rqhash_del(q, next);
- q->nr_sorted--;
+ if (next_sorted) {
+ elv_rqhash_del(q, next);
+ q->nr_sorted--;
+ }
+
q->last_merge = rq;
}
+void elv_bio_merged(struct request_queue *q, struct request *rq,
+ struct bio *bio)
+{
+ struct elevator_queue *e = q->elevator;
+
+ if (e->type->ops.elevator_bio_merged_fn)
+ e->type->ops.elevator_bio_merged_fn(q, rq, bio);
+}
+
+#ifdef CONFIG_PM_RUNTIME
+static void blk_pm_requeue_request(struct request *rq)
+{
+ if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
+ rq->q->nr_pending--;
+}
+
+static void blk_pm_add_request(struct request_queue *q, struct request *rq)
+{
+ if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
+ (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
+ pm_request_resume(q->dev);
+}
+#else
+static inline void blk_pm_requeue_request(struct request *rq) {}
+static inline void blk_pm_add_request(struct request_queue *q,
+ struct request *rq)
+{
+}
+#endif
+
void elv_requeue_request(struct request_queue *q, struct request *rq)
{
/*
* in_flight count again
*/
if (blk_account_rq(rq)) {
- q->in_flight--;
- if (blk_sorted_rq(rq))
+ q->in_flight[rq_is_sync(rq)]--;
+ if (rq->cmd_flags & REQ_SORTED)
elv_deactivate_rq(q, rq);
}
rq->cmd_flags &= ~REQ_STARTED;
- elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
+ blk_pm_requeue_request(rq);
+
+ __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
}
void elv_drain_elevator(struct request_queue *q)
{
static int printed;
- while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+
+ lockdep_assert_held(q->queue_lock);
+
+ while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
;
- if (q->nr_sorted == 0)
- return;
- if (printed++ < 10) {
+ if (q->nr_sorted && printed++ < 10) {
printk(KERN_ERR "%s: forced dispatching is broken "
"(nr_sorted=%u), please report this\n",
- q->elevator->elevator_type->elevator_name, q->nr_sorted);
- }
-}
-
-/*
- * Call with queue lock held, interrupts disabled
- */
-void elv_quiesce_start(struct request_queue *q)
-{
- queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
-
- /*
- * make sure we don't have any requests in flight
- */
- elv_drain_elevator(q);
- while (q->rq.elvpriv) {
- blk_start_queueing(q);
- spin_unlock_irq(q->queue_lock);
- msleep(10);
- spin_lock_irq(q->queue_lock);
- elv_drain_elevator(q);
+ q->elevator->type->elevator_name, q->nr_sorted);
}
}
-void elv_quiesce_end(struct request_queue *q)
-{
- queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
-}
-
-void elv_insert(struct request_queue *q, struct request *rq, int where)
+void __elv_add_request(struct request_queue *q, struct request *rq, int where)
{
- struct list_head *pos;
- unsigned ordseq;
- int unplug_it = 1;
-
trace_block_rq_insert(q, rq);
+ blk_pm_add_request(q, rq);
+
rq->q = q;
+ if (rq->cmd_flags & REQ_SOFTBARRIER) {
+ /* barriers are scheduling boundary, update end_sector */
+ if (rq->cmd_type == REQ_TYPE_FS) {
+ q->end_sector = rq_end_sector(rq);
+ q->boundary_rq = rq;
+ }
+ } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
+ (where == ELEVATOR_INSERT_SORT ||
+ where == ELEVATOR_INSERT_SORT_MERGE))
+ where = ELEVATOR_INSERT_BACK;
+
switch (where) {
+ case ELEVATOR_INSERT_REQUEUE:
case ELEVATOR_INSERT_FRONT:
rq->cmd_flags |= REQ_SOFTBARRIER;
-
list_add(&rq->queuelist, &q->queue_head);
break;
* with anything. There's no point in delaying queue
* processing.
*/
- blk_remove_plug(q);
- blk_start_queueing(q);
+ __blk_run_queue(q);
break;
+ case ELEVATOR_INSERT_SORT_MERGE:
+ /*
+ * If we succeed in merging this request with one in the
+ * queue already, we are done - rq has now been freed,
+ * so no need to do anything further.
+ */
+ if (elv_attempt_insert_merge(q, rq))
+ break;
case ELEVATOR_INSERT_SORT:
- BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
+ BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {
* rq cannot be accessed after calling
* elevator_add_req_fn.
*/
- q->elevator->ops->elevator_add_req_fn(q, rq);
+ q->elevator->type->ops.elevator_add_req_fn(q, rq);
break;
- case ELEVATOR_INSERT_REQUEUE:
- /*
- * If ordered flush isn't in progress, we do front
- * insertion; otherwise, requests should be requeued
- * in ordseq order.
- */
+ case ELEVATOR_INSERT_FLUSH:
rq->cmd_flags |= REQ_SOFTBARRIER;
-
- /*
- * Most requeues happen because of a busy condition,
- * don't force unplug of the queue for that case.
- */
- unplug_it = 0;
-
- if (q->ordseq == 0) {
- list_add(&rq->queuelist, &q->queue_head);
- break;
- }
-
- ordseq = blk_ordered_req_seq(rq);
-
- list_for_each(pos, &q->queue_head) {
- struct request *pos_rq = list_entry_rq(pos);
- if (ordseq <= blk_ordered_req_seq(pos_rq))
- break;
- }
-
- list_add_tail(&rq->queuelist, pos);
+ blk_insert_flush(rq);
break;
-
default:
printk(KERN_ERR "%s: bad insertion point %d\n",
__func__, where);
BUG();
}
-
- if (unplug_it && blk_queue_plugged(q)) {
- int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
- - q->in_flight;
-
- if (nrq >= q->unplug_thresh)
- __generic_unplug_device(q);
- }
-}
-
-void __elv_add_request(struct request_queue *q, struct request *rq, int where,
- int plug)
-{
- if (q->ordcolor)
- rq->cmd_flags |= REQ_ORDERED_COLOR;
-
- if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
- /*
- * toggle ordered color
- */
- if (blk_barrier_rq(rq))
- q->ordcolor ^= 1;
-
- /*
- * barriers implicitly indicate back insertion
- */
- if (where == ELEVATOR_INSERT_SORT)
- where = ELEVATOR_INSERT_BACK;
-
- /*
- * this request is scheduling boundary, update
- * end_sector
- */
- if (blk_fs_request(rq) || blk_discard_rq(rq)) {
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = rq;
- }
- } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
- where == ELEVATOR_INSERT_SORT)
- where = ELEVATOR_INSERT_BACK;
-
- if (plug)
- blk_plug_device(q);
-
- elv_insert(q, rq, where);
}
EXPORT_SYMBOL(__elv_add_request);
-void elv_add_request(struct request_queue *q, struct request *rq, int where,
- int plug)
+void elv_add_request(struct request_queue *q, struct request *rq, int where)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __elv_add_request(q, rq, where, plug);
+ __elv_add_request(q, rq, where);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(elv_add_request);
-static inline struct request *__elv_next_request(struct request_queue *q)
-{
- struct request *rq;
-
- while (1) {
- while (!list_empty(&q->queue_head)) {
- rq = list_entry_rq(q->queue_head.next);
- if (blk_do_ordered(q, &rq))
- return rq;
- }
-
- if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
- return NULL;
- }
-}
-
-struct request *elv_next_request(struct request_queue *q)
-{
- struct request *rq;
- int ret;
-
- while ((rq = __elv_next_request(q)) != NULL) {
- if (!(rq->cmd_flags & REQ_STARTED)) {
- /*
- * This is the first time the device driver
- * sees this request (possibly after
- * requeueing). Notify IO scheduler.
- */
- if (blk_sorted_rq(rq))
- elv_activate_rq(q, rq);
-
- /*
- * just mark as started even if we don't start
- * it, a request that has been delayed should
- * not be passed by new incoming requests
- */
- rq->cmd_flags |= REQ_STARTED;
- trace_block_rq_issue(q, rq);
- }
-
- if (!q->boundary_rq || q->boundary_rq == rq) {
- q->end_sector = rq_end_sector(rq);
- q->boundary_rq = NULL;
- }
-
- if (rq->cmd_flags & REQ_DONTPREP)
- break;
-
- if (q->dma_drain_size && rq->data_len) {
- /*
- * make sure space for the drain appears we
- * know we can do this because max_hw_segments
- * has been adjusted to be one fewer than the
- * device can handle
- */
- rq->nr_phys_segments++;
- }
-
- if (!q->prep_rq_fn)
- break;
-
- ret = q->prep_rq_fn(q, rq);
- if (ret == BLKPREP_OK) {
- break;
- } else if (ret == BLKPREP_DEFER) {
- /*
- * the request may have been (partially) prepped.
- * we need to keep this request in the front to
- * avoid resource deadlock. REQ_STARTED will
- * prevent other fs requests from passing this one.
- */
- if (q->dma_drain_size && rq->data_len &&
- !(rq->cmd_flags & REQ_DONTPREP)) {
- /*
- * remove the space for the drain we added
- * so that we don't add it again
- */
- --rq->nr_phys_segments;
- }
-
- rq = NULL;
- break;
- } else if (ret == BLKPREP_KILL) {
- rq->cmd_flags |= REQ_QUIET;
- __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
- } else {
- printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
- break;
- }
- }
-
- return rq;
-}
-EXPORT_SYMBOL(elv_next_request);
-
-void elv_dequeue_request(struct request_queue *q, struct request *rq)
-{
- BUG_ON(list_empty(&rq->queuelist));
- BUG_ON(ELV_ON_HASH(rq));
-
- list_del_init(&rq->queuelist);
-
- /*
- * the time frame between a request being removed from the lists
- * and to it is freed is accounted as io that is in progress at
- * the driver side.
- */
- if (blk_account_rq(rq))
- q->in_flight++;
-}
-
-int elv_queue_empty(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
-
- if (!list_empty(&q->queue_head))
- return 0;
-
- if (e->ops->elevator_queue_empty_fn)
- return e->ops->elevator_queue_empty_fn(q);
-
- return 1;
-}
-EXPORT_SYMBOL(elv_queue_empty);
-
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_latter_req_fn)
- return e->ops->elevator_latter_req_fn(q, rq);
+ if (e->type->ops.elevator_latter_req_fn)
+ return e->type->ops.elevator_latter_req_fn(q, rq);
return NULL;
}
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_former_req_fn)
- return e->ops->elevator_former_req_fn(q, rq);
+ if (e->type->ops.elevator_former_req_fn)
+ return e->type->ops.elevator_former_req_fn(q, rq);
return NULL;
}
-int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
+int elv_set_request(struct request_queue *q, struct request *rq,
+ struct bio *bio, gfp_t gfp_mask)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_set_req_fn)
- return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
-
- rq->elevator_private = NULL;
+ if (e->type->ops.elevator_set_req_fn)
+ return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
return 0;
}
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_put_req_fn)
- e->ops->elevator_put_req_fn(rq);
+ if (e->type->ops.elevator_put_req_fn)
+ e->type->ops.elevator_put_req_fn(rq);
}
int elv_may_queue(struct request_queue *q, int rw)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_may_queue_fn)
- return e->ops->elevator_may_queue_fn(q, rw);
+ if (e->type->ops.elevator_may_queue_fn)
+ return e->type->ops.elevator_may_queue_fn(q, rw);
return ELV_MQUEUE_MAY;
}
{
struct request *rq;
+ blk_abort_flushes(q);
+
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
rq->cmd_flags |= REQ_QUIET;
trace_block_rq_abort(q, rq);
- __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+ /*
+ * Mark this request as started so we don't trigger
+ * any debug logic in the end I/O path.
+ */
+ blk_start_request(rq);
+ __blk_end_request_all(rq, -EIO);
}
}
EXPORT_SYMBOL(elv_abort_queue);
* request is released from the driver, io must be done
*/
if (blk_account_rq(rq)) {
- q->in_flight--;
- if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
- e->ops->elevator_completed_req_fn(q, rq);
- }
-
- /*
- * Check if the queue is waiting for fs requests to be
- * drained for flush sequence.
- */
- if (unlikely(q->ordseq)) {
- struct request *next = NULL;
-
- if (!list_empty(&q->queue_head))
- next = list_entry_rq(q->queue_head.next);
-
- if (!q->in_flight &&
- blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
- (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
- blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
- blk_start_queueing(q);
- }
+ q->in_flight[rq_is_sync(rq)]--;
+ if ((rq->cmd_flags & REQ_SORTED) &&
+ e->type->ops.elevator_completed_req_fn)
+ e->type->ops.elevator_completed_req_fn(q, rq);
}
}
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->ops ? entry->show(e, page) : -ENOENT;
+ error = e->type ? entry->show(e, page) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
return error;
}
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->ops ? entry->store(e, page, length) : -ENOENT;
+ error = e->type ? entry->store(e, page, length) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
return error;
}
-static struct sysfs_ops elv_sysfs_ops = {
+static const struct sysfs_ops elv_sysfs_ops = {
.show = elv_attr_show,
.store = elv_attr_store,
};
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
if (!error) {
- struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
+ struct elv_fs_entry *attr = e->type->elevator_attrs;
if (attr) {
while (attr->attr.name) {
if (sysfs_create_file(&e->kobj, &attr->attr))
}
}
kobject_uevent(&e->kobj, KOBJ_ADD);
+ e->registered = 1;
}
return error;
}
-
-static void __elv_unregister_queue(struct elevator_queue *e)
-{
- kobject_uevent(&e->kobj, KOBJ_REMOVE);
- kobject_del(&e->kobj);
-}
+EXPORT_SYMBOL(elv_register_queue);
void elv_unregister_queue(struct request_queue *q)
{
- if (q)
- __elv_unregister_queue(q->elevator);
+ if (q) {
+ struct elevator_queue *e = q->elevator;
+
+ kobject_uevent(&e->kobj, KOBJ_REMOVE);
+ kobject_del(&e->kobj);
+ e->registered = 0;
+ }
}
+EXPORT_SYMBOL(elv_unregister_queue);
-void elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
{
char *def = "";
+ /* create icq_cache if requested */
+ if (e->icq_size) {
+ if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
+ WARN_ON(e->icq_align < __alignof__(struct io_cq)))
+ return -EINVAL;
+
+ snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
+ "%s_io_cq", e->elevator_name);
+ e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
+ e->icq_align, 0, NULL);
+ if (!e->icq_cache)
+ return -ENOMEM;
+ }
+
+ /* register, don't allow duplicate names */
spin_lock(&elv_list_lock);
- BUG_ON(elevator_find(e->elevator_name));
+ if (elevator_find(e->elevator_name)) {
+ spin_unlock(&elv_list_lock);
+ if (e->icq_cache)
+ kmem_cache_destroy(e->icq_cache);
+ return -EBUSY;
+ }
list_add_tail(&e->list, &elv_list);
spin_unlock(&elv_list_lock);
+ /* print pretty message */
if (!strcmp(e->elevator_name, chosen_elevator) ||
(!*chosen_elevator &&
!strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
def);
+ return 0;
}
EXPORT_SYMBOL_GPL(elv_register);
void elv_unregister(struct elevator_type *e)
{
- struct task_struct *g, *p;
+ /* unregister */
+ spin_lock(&elv_list_lock);
+ list_del_init(&e->list);
+ spin_unlock(&elv_list_lock);
/*
- * Iterate every thread in the process to remove the io contexts.
+ * Destroy icq_cache if it exists. icq's are RCU managed. Make
+ * sure all RCU operations are complete before proceeding.
*/
- if (e->ops.trim) {
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- task_lock(p);
- if (p->io_context)
- e->ops.trim(p->io_context);
- task_unlock(p);
- } while_each_thread(g, p);
- read_unlock(&tasklist_lock);
+ if (e->icq_cache) {
+ rcu_barrier();
+ kmem_cache_destroy(e->icq_cache);
+ e->icq_cache = NULL;
}
-
- spin_lock(&elv_list_lock);
- list_del_init(&e->list);
- spin_unlock(&elv_list_lock);
}
EXPORT_SYMBOL_GPL(elv_unregister);
*/
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
- struct elevator_queue *old_elevator, *e;
- void *data;
+ struct elevator_queue *old = q->elevator;
+ bool registered = old->registered;
+ int err;
/*
- * Allocate new elevator
+ * Turn on BYPASS and drain all requests w/ elevator private data.
+ * Block layer doesn't call into a quiesced elevator - all requests
+ * are directly put on the dispatch list without elevator data
+ * using INSERT_BACK. All requests have SOFTBARRIER set and no
+ * merge happens either.
*/
- e = elevator_alloc(q, new_e);
- if (!e)
- return 0;
+ blk_queue_bypass_start(q);
- data = elevator_init_queue(q, e);
- if (!data) {
- kobject_put(&e->kobj);
- return 0;
- }
+ /* unregister and clear all auxiliary data of the old elevator */
+ if (registered)
+ elv_unregister_queue(q);
- /*
- * Turn on BYPASS and drain all requests w/ elevator private data
- */
spin_lock_irq(q->queue_lock);
- elv_quiesce_start(q);
-
- /*
- * Remember old elevator.
- */
- old_elevator = q->elevator;
-
- /*
- * attach and start new elevator
- */
- elevator_attach(q, e, data);
-
+ ioc_clear_queue(q);
spin_unlock_irq(q->queue_lock);
- __elv_unregister_queue(old_elevator);
+ /* allocate, init and register new elevator */
+ err = new_e->ops.elevator_init_fn(q, new_e);
+ if (err)
+ goto fail_init;
- if (elv_register_queue(q))
- goto fail_register;
+ if (registered) {
+ err = elv_register_queue(q);
+ if (err)
+ goto fail_register;
+ }
- /*
- * finally exit old elevator and turn off BYPASS.
- */
- elevator_exit(old_elevator);
- spin_lock_irq(q->queue_lock);
- elv_quiesce_end(q);
- spin_unlock_irq(q->queue_lock);
+ /* done, kill the old one and finish */
+ elevator_exit(old);
+ blk_queue_bypass_end(q);
- blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+ blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
- return 1;
+ return 0;
fail_register:
- /*
- * switch failed, exit the new io scheduler and reattach the old
- * one again (along with re-adding the sysfs dir)
- */
- elevator_exit(e);
- q->elevator = old_elevator;
+ elevator_exit(q->elevator);
+fail_init:
+ /* switch failed, restore and re-register old elevator */
+ q->elevator = old;
elv_register_queue(q);
+ blk_queue_bypass_end(q);
- spin_lock_irq(q->queue_lock);
- queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
- spin_unlock_irq(q->queue_lock);
-
- return 0;
+ return err;
}
-ssize_t elv_iosched_store(struct request_queue *q, const char *name,
- size_t count)
+/*
+ * Switch this queue to the given IO scheduler.
+ */
+static int __elevator_change(struct request_queue *q, const char *name)
{
char elevator_name[ELV_NAME_MAX];
struct elevator_type *e;
- strlcpy(elevator_name, name, sizeof(elevator_name));
- strstrip(elevator_name);
+ if (!q->elevator)
+ return -ENXIO;
- e = elevator_get(elevator_name);
+ strlcpy(elevator_name, name, sizeof(elevator_name));
+ e = elevator_get(strstrip(elevator_name), true);
if (!e) {
printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
return -EINVAL;
}
- if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
+ if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
elevator_put(e);
- return count;
+ return 0;
}
- if (!elevator_switch(q, e))
- printk(KERN_ERR "elevator: switch to %s failed\n",
- elevator_name);
- return count;
+ return elevator_switch(q, e);
+}
+
+int elevator_change(struct request_queue *q, const char *name)
+{
+ int ret;
+
+ /* Protect q->elevator from elevator_init() */
+ mutex_lock(&q->sysfs_lock);
+ ret = __elevator_change(q, name);
+ mutex_unlock(&q->sysfs_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(elevator_change);
+
+ssize_t elv_iosched_store(struct request_queue *q, const char *name,
+ size_t count)
+{
+ int ret;
+
+ if (!q->elevator)
+ return count;
+
+ ret = __elevator_change(q, name);
+ if (!ret)
+ return count;
+
+ printk(KERN_ERR "elevator: switch to %s failed\n", name);
+ return ret;
}
ssize_t elv_iosched_show(struct request_queue *q, char *name)
{
struct elevator_queue *e = q->elevator;
- struct elevator_type *elv = e->elevator_type;
+ struct elevator_type *elv;
struct elevator_type *__e;
int len = 0;
+ if (!q->elevator || !blk_queue_stackable(q))
+ return sprintf(name, "none\n");
+
+ elv = e->type;
+
spin_lock(&elv_list_lock);
list_for_each_entry(__e, &elv_list, list) {
if (!strcmp(elv->elevator_name, __e->elevator_name))