CFQ: make two functions static

[linux-3.10.git] / block / cfq-iosched.c
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index c826ef81c6794c41616d4ac8284a8af0ec82f3b8..2b2d7a9a42ab2e6b33826c09941bd90f3f5544c7 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -178,6 +178,8 @@ struct cfq_group {
         /* group service_tree key */
         u64 vdisktime;
         unsigned int weight;
+       unsigned int new_weight;
+       bool needs_update;
  
         /* number of cfqq currently on this group */
         int nr_cfqq;
@@ -298,7 +300,9 @@ struct cfq_data {
  
         /* List of cfq groups being managed on this device*/
         struct hlist_head cfqg_list;
-       struct rcu_head rcu;
+
+       /* Number of groups which are on blkcg->blkg_list */
+       unsigned int nr_blkcg_linked_grps;
  };
  
  static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -663,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
         if (rq2 == NULL)
                 return rq1;
  
-       if (rq_is_sync(rq1) && !rq_is_sync(rq2))
-               return rq1;
-       else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
-               return rq2;
-       if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
-               return rq1;
-       else if ((rq2->cmd_flags & REQ_META) &&
-                !(rq1->cmd_flags & REQ_META))
-               return rq2;
+       if (rq_is_sync(rq1) != rq_is_sync(rq2))
+               return rq_is_sync(rq1) ? rq1 : rq2;
+
+       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
+               return rq1->cmd_flags & REQ_META ? rq1 : rq2;
  
         s1 = blk_rq_pos(rq1);
         s2 = blk_rq_pos(rq2);
@@ -853,7 +853,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
  }
  
  static void
-cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_update_group_weight(struct cfq_group *cfqg)
+{
+       BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+       if (cfqg->needs_update) {
+               cfqg->weight = cfqg->new_weight;
+               cfqg->needs_update = false;
+       }
+}
+
+static void
+cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+       BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+
+       cfq_update_group_weight(cfqg);
+       __cfq_group_service_tree_add(st, cfqg);
+       st->total_weight += cfqg->weight;
+}
+
+static void
+cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
  {
         struct cfq_rb_root *st = &cfqd->grp_service_tree;
         struct cfq_group *__cfqg;
@@ -866,7 +886,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
         /*
          * Currently put the group at the end. Later implement something
          * so that groups get lesser vtime based on their weights, so that
-        * if group does not loose all if it was not continously backlogged.
+        * if group does not loose all if it was not continuously backlogged.
          */
         n = rb_last(&st->rb);
         if (n) {
@@ -874,13 +894,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
                 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
         } else
                 cfqg->vdisktime = st->min_vdisktime;
+       cfq_group_service_tree_add(st, cfqg);
+}
  
-       __cfq_group_service_tree_add(st, cfqg);
-       st->total_weight += cfqg->weight;
+static void
+cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
+{
+       st->total_weight -= cfqg->weight;
+       if (!RB_EMPTY_NODE(&cfqg->rb_node))
+               cfq_rb_erase(&cfqg->rb_node, st);
  }
  
  static void
-cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
+cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
  {
         struct cfq_rb_root *st = &cfqd->grp_service_tree;
  
@@ -892,14 +918,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
                 return;
  
         cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
-       st->total_weight -= cfqg->weight;
-       if (!RB_EMPTY_NODE(&cfqg->rb_node))
-               cfq_rb_erase(&cfqg->rb_node, st);
+       cfq_group_service_tree_del(st, cfqg);
         cfqg->saved_workload_slice = 0;
         cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
  }
  
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
+static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+                                               unsigned int *unaccounted_time)
  {
         unsigned int slice_used;
  
@@ -918,8 +943,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
                                         1);
         } else {
                 slice_used = jiffies - cfqq->slice_start;
-               if (slice_used > cfqq->allocated_slice)
+               if (slice_used > cfqq->allocated_slice) {
+                       *unaccounted_time = slice_used - cfqq->allocated_slice;
                         slice_used = cfqq->allocated_slice;
+               }
+               if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+                       *unaccounted_time += cfqq->slice_start -
+                                       cfqq->dispatch_start;
         }
  
         return slice_used;
@@ -929,12 +959,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                                 struct cfq_queue *cfqq)
  {
         struct cfq_rb_root *st = &cfqd->grp_service_tree;
-       unsigned int used_sl, charge;
+       unsigned int used_sl, charge, unaccounted_sl = 0;
         int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
                         - cfqg->service_tree_idle.count;
  
         BUG_ON(nr_sync < 0);
-       used_sl = charge = cfq_cfqq_slice_usage(cfqq);
+       used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
  
         if (iops_mode(cfqd))
                 charge = cfqq->slice_dispatch;
@@ -942,9 +972,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                 charge = cfqq->allocated_slice;
  
         /* Can't update vdisktime while group is on service tree */
-       cfq_rb_erase(&cfqg->rb_node, st);
+       cfq_group_service_tree_del(st, cfqg);
         cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
-       __cfq_group_service_tree_add(st, cfqg);
+       /* If a new weight was requested, update now, off tree */
+       cfq_group_service_tree_add(st, cfqg);
  
         /* This group is being expired. Save the context */
         if (time_after(cfqd->workload_expires, jiffies)) {
@@ -960,7 +991,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
         cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
                         " sect=%u", used_sl, cfqq->slice_dispatch, charge,
                         iops_mode(cfqd), cfqq->nr_sectors);
-       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+       cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
+                                         unaccounted_sl);
         cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
  }
  
@@ -972,35 +1004,55 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
         return NULL;
  }
  
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                       unsigned int weight)
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+                                         unsigned int weight)
  {
-       cfqg_of_blkg(blkg)->weight = weight;
+       struct cfq_group *cfqg = cfqg_of_blkg(blkg);
+       cfqg->new_weight = weight;
+       cfqg->needs_update = true;
  }
  
-static struct cfq_group *
-cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
+static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
+                       struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
  {
-       struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
-       struct cfq_group *cfqg = NULL;
-       void *key = cfqd;
-       int i, j;
-       struct cfq_rb_root *st;
         struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
         unsigned int major, minor;
  
-       cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
-       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+       /*
+        * Add group onto cgroup list. It might happen that bdi->dev is
+        * not initialized yet. Initialize this new group without major
+        * and minor info and this info will be filled in once a new thread
+        * comes for IO.
+        */
+       if (bdi->dev) {
                 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfqg->blkg.dev = MKDEV(major, minor);
-               goto done;
-       }
-       if (cfqg || !create)
-               goto done;
+               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+                                       (void *)cfqd, MKDEV(major, minor));
+       } else
+               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
+                                       (void *)cfqd, 0);
+
+       cfqd->nr_blkcg_linked_grps++;
+       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+
+       /* Add group on cfqd list */
+       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+}
+
+/*
+ * Should be called from sleepable context. No request queue lock as per
+ * cpu stats are allocated dynamically and alloc_percpu needs to be called
+ * from sleepable context.
+ */
+static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
+{
+       struct cfq_group *cfqg = NULL;
+       int i, j, ret;
+       struct cfq_rb_root *st;
  
         cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
         if (!cfqg)
-               goto done;
+               return NULL;
  
         for_each_cfqg_st(cfqg, i, j, st)
                 *st = CFQ_RB_ROOT;
@@ -1014,43 +1066,94 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
          */
         cfqg->ref = 1;
  
+       ret = blkio_alloc_blkg_stats(&cfqg->blkg);
+       if (ret) {
+               kfree(cfqg);
+               return NULL;
+       }
+
+       return cfqg;
+}
+
+static struct cfq_group *
+cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
+{
+       struct cfq_group *cfqg = NULL;
+       void *key = cfqd;
+       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+       unsigned int major, minor;
+
         /*
-        * Add group onto cgroup list. It might happen that bdi->dev is
-        * not initialized yet. Initialize this new group without major
-        * and minor info and this info will be filled in once a new thread
-        * comes for IO. See code above.
+        * This is the common case when there are no blkio cgroups.
+        * Avoid lookup in this case
          */
-       if (bdi->dev) {
-               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-                                       MKDEV(major, minor));
-       } else
-               cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
-                                       0);
-
-       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
+       if (blkcg == &blkio_root_cgroup)
+               cfqg = &cfqd->root_group;
+       else
+               cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
  
-       /* Add group on cfqd list */
-       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+       if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+               sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+               cfqg->blkg.dev = MKDEV(major, minor);
+       }
  
-done:
         return cfqg;
  }
  
  /*
- * Search for the cfq group current task belongs to. If create = 1, then also
- * create the cfq group if it does not exist. request_queue lock must be held.
+ * Search for the cfq group current task belongs to. request_queue lock must
+ * be held.
   */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
  {
-       struct cgroup *cgroup;
-       struct cfq_group *cfqg = NULL;
+       struct blkio_cgroup *blkcg;
+       struct cfq_group *cfqg = NULL, *__cfqg = NULL;
+       struct request_queue *q = cfqd->queue;
  
         rcu_read_lock();
-       cgroup = task_cgroup(current, blkio_subsys_id);
-       cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
-       if (!cfqg && create)
+       blkcg = task_blkio_cgroup(current);
+       cfqg = cfq_find_cfqg(cfqd, blkcg);
+       if (cfqg) {
+               rcu_read_unlock();
+               return cfqg;
+       }
+
+       /*
+        * Need to allocate a group. Allocation of group also needs allocation
+        * of per cpu stats which in-turn takes a mutex() and can block. Hence
+        * we need to drop rcu lock and queue_lock before we call alloc.
+        *
+        * Not taking any queue reference here and assuming that queue is
+        * around by the time we return. CFQ queue allocation code does
+        * the same. It might be racy though.
+        */
+
+       rcu_read_unlock();
+       spin_unlock_irq(q->queue_lock);
+
+       cfqg = cfq_alloc_cfqg(cfqd);
+
+       spin_lock_irq(q->queue_lock);
+
+       rcu_read_lock();
+       blkcg = task_blkio_cgroup(current);
+
+       /*
+        * If some other thread already allocated the group while we were
+        * not holding queue lock, free up the group
+        */
+       __cfqg = cfq_find_cfqg(cfqd, blkcg);
+
+       if (__cfqg) {
+               kfree(cfqg);
+               rcu_read_unlock();
+               return __cfqg;
+       }
+
+       if (!cfqg)
                 cfqg = &cfqd->root_group;
+
+       cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
         rcu_read_unlock();
         return cfqg;
  }
@@ -1083,6 +1186,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
                 return;
         for_each_cfqg_st(cfqg, i, j, st)
                 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
+       free_percpu(cfqg->blkg.stats_cpu);
         kfree(cfqg);
  }
  
@@ -1130,7 +1234,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
   * it should not be NULL as even if elevator was exiting, cgroup deltion
   * path got to it first.
   */
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
  {
         unsigned long  flags;
         struct cfq_data *cfqd = key;
@@ -1141,7 +1245,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
  }
  
  #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
  {
         return &cfqd->root_group;
  }
@@ -1175,7 +1279,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         struct cfq_rb_root *service_tree;
         int left;
         int new_cfqq = 1;
-       int group_changed = 0;
  
         service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
                                                 cfqq_type(cfqq));
@@ -1246,9 +1349,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         rb_link_node(&cfqq->rb_node, parent, p);
         rb_insert_color(&cfqq->rb_node, &service_tree->rb);
         service_tree->count++;
-       if ((add_front || !new_cfqq) && !group_changed)
+       if (add_front || !new_cfqq)
                 return;
-       cfq_group_service_tree_add(cfqd, cfqq->cfqg);
+       cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
  }
  
  static struct cfq_queue *
@@ -1361,7 +1464,7 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                 cfqq->p_root = NULL;
         }
  
-       cfq_group_service_tree_del(cfqd, cfqq->cfqg);
+       cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
         BUG_ON(!cfqd->busy_queues);
         cfqd->busy_queues--;
         if (cfq_cfqq_sync(cfqq))
@@ -1398,16 +1501,11 @@ static void cfq_add_rq_rb(struct request *rq)
  {
         struct cfq_queue *cfqq = RQ_CFQQ(rq);
         struct cfq_data *cfqd = cfqq->cfqd;
-       struct request *__alias, *prev;
+       struct request *prev;
  
         cfqq->queued[rq_is_sync(rq)]++;
  
-       /*
-        * looks a little odd, but the first insert might return an alias.
-        * if that happens, put the alias on the dispatch list
-        */
-       while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
-               cfq_dispatch_insert(cfqd->queue, __alias);
+       elv_rb_add(&cfqq->sort_list, rq);
  
         if (!cfq_cfqq_on_rr(cfqq))
                 cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1994,7 +2092,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  
         WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
  
-       return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
+       return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
  }
  
  /*
@@ -2381,19 +2479,14 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                         return false;
  
                 /*
-                * If there is only one sync queue, and its think time is
-                * small, we can ignore async queue here and give the sync
+                * If there is only one sync queue
+                * we can ignore async queue here and give the sync
                  * queue no dispatch limit. The reason is a sync queue can
                  * preempt async queue, limiting the sync queue doesn't make
                  * sense. This is useful for aiostress test.
                  */
-               if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) {
-                       struct cfq_io_context *cic = RQ_CIC(cfqq->next_rq);
-
-                       if (sample_valid(cic->ttime_samples) &&
-                               cic->ttime_mean < cfqd->cfq_slice_idle)
-                               promote_sync = true;
-               }
+               if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
+                       promote_sync = true;
  
                 /*
                  * We have other queues, don't allow more IO from this one
@@ -2551,28 +2644,20 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
  }
  
  /*
- * Must always be called with the rcu_read_lock() held
+ * Call func for each cic attached to this ioc.
   */
  static void
-__call_for_each_cic(struct io_context *ioc,
-                   void (*func)(struct io_context *, struct cfq_io_context *))
+call_for_each_cic(struct io_context *ioc,
+                 void (*func)(struct io_context *, struct cfq_io_context *))
  {
         struct cfq_io_context *cic;
         struct hlist_node *n;
  
+       rcu_read_lock();
+
         hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
                 func(ioc, cic);
-}
  
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
-                 void (*func)(struct io_context *, struct cfq_io_context *))
-{
-       rcu_read_lock();
-       __call_for_each_cic(ioc, func);
         rcu_read_unlock();
  }
  
@@ -2633,7 +2718,7 @@ static void cfq_free_io_context(struct io_context *ioc)
          * should be ok to iterate over the known list, we will see all cic's
          * since no new ones are added.
          */
-       __call_for_each_cic(ioc, cic_free_func);
+       call_for_each_cic(ioc, cic_free_func);
  }
  
  static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2682,8 +2767,11 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
         smp_wmb();
         cic->key = cfqd_dead_key(cfqd);
  
-       if (ioc->ioc_data == cic)
+       if (rcu_dereference(ioc->ioc_data) == cic) {
+               spin_lock(&ioc->lock);
                 rcu_assign_pointer(ioc->ioc_data, NULL);
+               spin_unlock(&ioc->lock);
+       }
  
         if (cic->cfqq[BLK_RW_ASYNC]) {
                 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2889,7 +2977,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
         struct cfq_group *cfqg;
  
  retry:
-       cfqg = cfq_get_cfqg(cfqd, 1);
+       cfqg = cfq_get_cfqg(cfqd);
         cic = cfq_cic_lookup(cfqd, ioc);
         /* cic always exists here */
         cfqq = cic_to_cfqq(cic, is_sync);
@@ -3337,7 +3425,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                             cfqd->busy_queues > 1) {
                                 cfq_del_timer(cfqd, cfqq);
                                 cfq_clear_cfqq_wait_request(cfqq);
-                               __blk_run_queue(cfqd->queue, false);
+                               __blk_run_queue(cfqd->queue);
                         } else {
                                 cfq_blkiocg_update_idle_time_stats(
                                                 &cfqq->cfqg->blkg);
@@ -3352,7 +3440,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                  * this new queue is RT and the current one is BE
                  */
                 cfq_preempt_queue(cfqd, cfqq);
-               __blk_run_queue(cfqd->queue, false);
+               __blk_run_queue(cfqd->queue);
         }
  }
  
@@ -3712,7 +3800,7 @@ static void cfq_kick_queue(struct work_struct *work)
         struct request_queue *q = cfqd->queue;
  
         spin_lock_irq(q->queue_lock);
-       __blk_run_queue(cfqd->queue, false);
+       __blk_run_queue(cfqd->queue);
         spin_unlock_irq(q->queue_lock);
  }
  
@@ -3793,15 +3881,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
                 cfq_put_queue(cfqd->async_idle_cfqq);
  }
  
-static void cfq_cfqd_free(struct rcu_head *head)
-{
-       kfree(container_of(head, struct cfq_data, rcu));
-}
-
  static void cfq_exit_queue(struct elevator_queue *e)
  {
         struct cfq_data *cfqd = e->elevator_data;
         struct request_queue *q = cfqd->queue;
+       bool wait = false;
  
         cfq_shutdown_timer_wq(cfqd);
  
@@ -3820,7 +3904,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
  
         cfq_put_async_queues(cfqd);
         cfq_release_cfq_groups(cfqd);
-       cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
+
+       /*
+        * If there are groups which we could not unlink from blkcg list,
+        * wait for a rcu period for them to be freed.
+        */
+       if (cfqd->nr_blkcg_linked_grps)
+               wait = true;
  
         spin_unlock_irq(q->queue_lock);
  
@@ -3830,8 +3920,25 @@ static void cfq_exit_queue(struct elevator_queue *e)
         ida_remove(&cic_index_ida, cfqd->cic_index);
         spin_unlock(&cic_index_lock);
  
-       /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
-       call_rcu(&cfqd->rcu, cfq_cfqd_free);
+       /*
+        * Wait for cfqg->blkg->key accessors to exit their grace periods.
+        * Do this wait only if there are other unlinked groups out
+        * there. This can happen if cgroup deletion path claimed the
+        * responsibility of cleaning up a group before queue cleanup code
+        * get to the group.
+        *
+        * Do not call synchronize_rcu() unconditionally as there are drivers
+        * which create/delete request queue hundreds of times during scan/boot
+        * and synchronize_rcu() can take significant time and slow down boot.
+        */
+       if (wait)
+               synchronize_rcu();
+
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       /* Free up per cpu stats for root group */
+       free_percpu(cfqd->root_group.blkg.stats_cpu);
+#endif
+       kfree(cfqd);
  }
  
  static int cfq_alloc_cic_index(void)
@@ -3864,8 +3971,12 @@ static void *cfq_init_queue(struct request_queue *q)
                 return NULL;
  
         cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
-       if (!cfqd)
+       if (!cfqd) {
+               spin_lock(&cic_index_lock);
+               ida_remove(&cic_index_ida, i);
+               spin_unlock(&cic_index_lock);
                 return NULL;
+       }
  
         /*
          * Don't need take queue_lock in the routine, since we are
@@ -3887,14 +3998,29 @@ static void *cfq_init_queue(struct request_queue *q)
  
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
         /*
-        * Take a reference to root group which we never drop. This is just
-        * to make sure that cfq_put_cfqg() does not try to kfree root group
+        * Set root group reference to 2. One reference will be dropped when
+        * all groups on cfqd->cfqg_list are being deleted during queue exit.
+        * Other reference will remain there as we don't want to delete this
+        * group as it is statically allocated and gets destroyed when
+        * throtl_data goes away.
          */
-       cfqg->ref = 1;
+       cfqg->ref = 2;
+
+       if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
+               kfree(cfqg);
+               kfree(cfqd);
+               return NULL;
+       }
+
         rcu_read_lock();
+
         cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
                                         (void *)cfqd, 0);
         rcu_read_unlock();
+       cfqd->nr_blkcg_linked_grps++;
+
+       /* Add group on cfqd->cfqg_list */
+       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
  #endif
         /*
          * Not strictly needed (since RB_ROOT just clears the node and we