Merge branch 'for-linus' of ../linux-2.6-block into block-for-2.6.39/core
Tejun Heo [Fri, 4 Mar 2011 18:09:02 +0000 (19:09 +0100)]
This merge creates two set of conflicts.  One is simple context
conflicts caused by removal of throtl_scheduled_delayed_work() in
for-linus and removal of throtl_shutdown_timer_wq() in
for-2.6.39/core.

The other is caused by commit 255bb490c8 (block: blk-flush shouldn't
call directly into q->request_fn() __blk_run_queue()) in for-linus
crashing with FLUSH reimplementation in for-2.6.39/core.  The conflict
isn't trivial but the resolution is straight-forward.

* __blk_run_queue() calls in flush_end_io() and flush_data_end_io()
  should be called with @force_kblockd set to %true.

* elv_insert() in blk_kick_flush() should use
  %ELEVATOR_INSERT_REQUEUE.

Both changes are to avoid invoking ->request_fn() directly from
request completion path and closely match the changes in the commit
255bb490c8.

Signed-off-by: Tejun Heo <tj@kernel.org>

1  2 
block/blk-core.c
block/blk-flush.c
block/blk-throttle.c
block/cfq-iosched.c
block/elevator.c
block/genhd.c
drivers/block/loop.c
include/linux/blkdev.h

Simple merge
  
  /* FLUSH/FUA sequences */
  enum {
 -      QUEUE_FSEQ_STARTED      = (1 << 0), /* flushing in progress */
 -      QUEUE_FSEQ_PREFLUSH     = (1 << 1), /* pre-flushing in progress */
 -      QUEUE_FSEQ_DATA         = (1 << 2), /* data write in progress */
 -      QUEUE_FSEQ_POSTFLUSH    = (1 << 3), /* post-flushing in progress */
 -      QUEUE_FSEQ_DONE         = (1 << 4),
 +      REQ_FSEQ_PREFLUSH       = (1 << 0), /* pre-flushing in progress */
 +      REQ_FSEQ_DATA           = (1 << 1), /* data write in progress */
 +      REQ_FSEQ_POSTFLUSH      = (1 << 2), /* post-flushing in progress */
 +      REQ_FSEQ_DONE           = (1 << 3),
 +
 +      REQ_FSEQ_ACTIONS        = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
 +                                REQ_FSEQ_POSTFLUSH,
 +
 +      /*
 +       * If flush has been pending longer than the following timeout,
 +       * it's issued even if flush_data requests are still in flight.
 +       */
 +      FLUSH_PENDING_TIMEOUT   = 5 * HZ,
  };
  
 -static struct request *queue_next_fseq(struct request_queue *q);
 +static bool blk_kick_flush(struct request_queue *q);
  
 -unsigned blk_flush_cur_seq(struct request_queue *q)
 +static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
  {
 -      if (!q->flush_seq)
 -              return 0;
 -      return 1 << ffz(q->flush_seq);
 +      unsigned int policy = 0;
 +
 +      if (fflags & REQ_FLUSH) {
 +              if (rq->cmd_flags & REQ_FLUSH)
 +                      policy |= REQ_FSEQ_PREFLUSH;
 +              if (blk_rq_sectors(rq))
 +                      policy |= REQ_FSEQ_DATA;
 +              if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
 +                      policy |= REQ_FSEQ_POSTFLUSH;
 +      }
 +      return policy;
  }
  
 -static struct request *blk_flush_complete_seq(struct request_queue *q,
 -                                            unsigned seq, int error)
 +static unsigned int blk_flush_cur_seq(struct request *rq)
  {
 -      struct request *next_rq = NULL;
 -
 -      if (error && !q->flush_err)
 -              q->flush_err = error;
 -
 -      BUG_ON(q->flush_seq & seq);
 -      q->flush_seq |= seq;
 -
 -      if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
 -              /* not complete yet, queue the next flush sequence */
 -              next_rq = queue_next_fseq(q);
 -      } else {
 -              /* complete this flush request */
 -              __blk_end_request_all(q->orig_flush_rq, q->flush_err);
 -              q->orig_flush_rq = NULL;
 -              q->flush_seq = 0;
 -
 -              /* dispatch the next flush if there's one */
 -              if (!list_empty(&q->pending_flushes)) {
 -                      next_rq = list_entry_rq(q->pending_flushes.next);
 -                      list_move(&next_rq->queuelist, &q->queue_head);
 -              }
 +      return 1 << ffz(rq->flush.seq);
 +}
 +
 +static void blk_flush_restore_request(struct request *rq)
 +{
 +      /*
 +       * After flush data completion, @rq->bio is %NULL but we need to
 +       * complete the bio again.  @rq->biotail is guaranteed to equal the
 +       * original @rq->bio.  Restore it.
 +       */
 +      rq->bio = rq->biotail;
 +
 +      /* make @rq a normal request */
 +      rq->cmd_flags &= ~REQ_FLUSH_SEQ;
 +      rq->end_io = NULL;
 +}
 +
 +/**
 + * blk_flush_complete_seq - complete flush sequence
 + * @rq: FLUSH/FUA request being sequenced
 + * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
 + * @error: whether an error occurred
 + *
 + * @rq just completed @seq part of its flush sequence, record the
 + * completion and trigger the next step.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + *
 + * RETURNS:
 + * %true if requests were added to the dispatch queue, %false otherwise.
 + */
 +static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 +                                 int error)
 +{
 +      struct request_queue *q = rq->q;
 +      struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
 +      bool queued = false;
 +
 +      BUG_ON(rq->flush.seq & seq);
 +      rq->flush.seq |= seq;
 +
 +      if (likely(!error))
 +              seq = blk_flush_cur_seq(rq);
 +      else
 +              seq = REQ_FSEQ_DONE;
 +
 +      switch (seq) {
 +      case REQ_FSEQ_PREFLUSH:
 +      case REQ_FSEQ_POSTFLUSH:
 +              /* queue for flush */
 +              if (list_empty(pending))
 +                      q->flush_pending_since = jiffies;
 +              list_move_tail(&rq->flush.list, pending);
 +              break;
 +
 +      case REQ_FSEQ_DATA:
 +              list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
 +              list_add(&rq->queuelist, &q->queue_head);
 +              queued = true;
 +              break;
 +
 +      case REQ_FSEQ_DONE:
 +              /*
 +               * @rq was previously adjusted by blk_flush_issue() for
 +               * flush sequencing and may already have gone through the
 +               * flush data request completion path.  Restore @rq for
 +               * normal completion and end it.
 +               */
 +              BUG_ON(!list_empty(&rq->queuelist));
 +              list_del_init(&rq->flush.list);
 +              blk_flush_restore_request(rq);
 +              __blk_end_request_all(rq, error);
 +              break;
 +
 +      default:
 +              BUG();
        }
 -      return next_rq;
 +
 +      return blk_kick_flush(q) | queued;
  }
  
 -static void blk_flush_complete_seq_end_io(struct request_queue *q,
 -                                        unsigned seq, int error)
 +static void flush_end_io(struct request *flush_rq, int error)
  {
 +      struct request_queue *q = flush_rq->q;
 +      struct list_head *running = &q->flush_queue[q->flush_running_idx];
        bool was_empty = elv_queue_empty(q);
 -      struct request *next_rq;
 +      bool queued = false;
 +      struct request *rq, *n;
 +
 +      BUG_ON(q->flush_pending_idx == q->flush_running_idx);
 +
 +      /* account completion of the flush request */
 +      q->flush_running_idx ^= 1;
 +      elv_completed_request(q, flush_rq);
  
 -      next_rq = blk_flush_complete_seq(q, seq, error);
 +      /* and push the waiting requests to the next stage */
 +      list_for_each_entry_safe(rq, n, running, flush.list) {
 +              unsigned int seq = blk_flush_cur_seq(rq);
 +
 +              BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
 +              queued |= blk_flush_complete_seq(rq, seq, error);
 +      }
  
-       /* after populating an empty queue, kick it to avoid stall */
+       /*
+        * Moving a request silently to empty queue_head may stall the
+        * queue.  Kick the queue in those cases.  This function is called
+        * from request completion path and calling directly into
+        * request_fn may confuse the driver.  Always use kblockd.
+        */
 -      if (was_empty && next_rq)
 +      if (queued && was_empty)
-               __blk_run_queue(q);
+               __blk_run_queue(q, true);
  }
  
 -static void pre_flush_end_io(struct request *rq, int error)
 +/**
 + * blk_kick_flush - consider issuing flush request
 + * @q: request_queue being kicked
 + *
 + * Flush related states of @q have changed, consider issuing flush request.
 + * Please read the comment at the top of this file for more info.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + *
 + * RETURNS:
 + * %true if flush was issued, %false otherwise.
 + */
 +static bool blk_kick_flush(struct request_queue *q)
  {
 -      elv_completed_request(rq->q, rq);
 -      blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
 +      struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
 +      struct request *first_rq =
 +              list_first_entry(pending, struct request, flush.list);
 +
 +      /* C1 described at the top of this file */
 +      if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
 +              return false;
 +
 +      /* C2 and C3 */
 +      if (!list_empty(&q->flush_data_in_flight) &&
 +          time_before(jiffies,
 +                      q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
 +              return false;
 +
 +      /*
 +       * Issue flush and toggle pending_idx.  This makes pending_idx
 +       * different from running_idx, which means flush is in flight.
 +       */
 +      blk_rq_init(q, &q->flush_rq);
 +      q->flush_rq.cmd_type = REQ_TYPE_FS;
 +      q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
 +      q->flush_rq.rq_disk = first_rq->rq_disk;
 +      q->flush_rq.end_io = flush_end_io;
 +
 +      q->flush_pending_idx ^= 1;
-       elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_FRONT);
++      elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE);
 +      return true;
  }
  
  static void flush_data_end_io(struct request *rq, int error)
  {
 -      elv_completed_request(rq->q, rq);
 -      blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
 -}
 +      struct request_queue *q = rq->q;
 +      bool was_empty = elv_queue_empty(q);
  
-       /* after populating an empty queue, kick it to avoid stall */
 -static void post_flush_end_io(struct request *rq, int error)
 -{
 -      elv_completed_request(rq->q, rq);
 -      blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
++      /*
++       * After populating an empty queue, kick it to avoid stall.  Read
++       * the comment in flush_end_io().
++       */
 +      if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
-               __blk_run_queue(q);
++              __blk_run_queue(q, true);
  }
  
 -static void init_flush_request(struct request *rq, struct gendisk *disk)
 +/**
 + * blk_insert_flush - insert a new FLUSH/FUA request
 + * @rq: request to insert
 + *
 + * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions.
 + * @rq is being submitted.  Analyze what needs to be done and put it on the
 + * right queue.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + */
 +void blk_insert_flush(struct request *rq)
  {
 -      rq->cmd_type = REQ_TYPE_FS;
 -      rq->cmd_flags = WRITE_FLUSH;
 -      rq->rq_disk = disk;
 -}
 +      struct request_queue *q = rq->q;
 +      unsigned int fflags = q->flush_flags;   /* may change, cache */
 +      unsigned int policy = blk_flush_policy(fflags, rq);
  
 -static struct request *queue_next_fseq(struct request_queue *q)
 -{
 -      struct request *orig_rq = q->orig_flush_rq;
 -      struct request *rq = &q->flush_rq;
 +      BUG_ON(rq->end_io);
 +      BUG_ON(!rq->bio || rq->bio != rq->biotail);
  
 -      blk_rq_init(q, rq);
 +      /*
 +       * @policy now records what operations need to be done.  Adjust
 +       * REQ_FLUSH and FUA for the driver.
 +       */
 +      rq->cmd_flags &= ~REQ_FLUSH;
 +      if (!(fflags & REQ_FUA))
 +              rq->cmd_flags &= ~REQ_FUA;
  
 -      switch (blk_flush_cur_seq(q)) {
 -      case QUEUE_FSEQ_PREFLUSH:
 -              init_flush_request(rq, orig_rq->rq_disk);
 -              rq->end_io = pre_flush_end_io;
 -              break;
 -      case QUEUE_FSEQ_DATA:
 -              init_request_from_bio(rq, orig_rq->bio);
 -              /*
 -               * orig_rq->rq_disk may be different from
 -               * bio->bi_bdev->bd_disk if orig_rq got here through
 -               * remapping drivers.  Make sure rq->rq_disk points
 -               * to the same one as orig_rq.
 -               */
 -              rq->rq_disk = orig_rq->rq_disk;
 -              rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
 -              rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 -              rq->end_io = flush_data_end_io;
 -              break;
 -      case QUEUE_FSEQ_POSTFLUSH:
 -              init_flush_request(rq, orig_rq->rq_disk);
 -              rq->end_io = post_flush_end_io;
 -              break;
 -      default:
 -              BUG();
 +      /*
 +       * If there's data but flush is not necessary, the request can be
 +       * processed directly without going through flush machinery.  Queue
 +       * for normal execution.
 +       */
 +      if ((policy & REQ_FSEQ_DATA) &&
 +          !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 +              list_add(&rq->queuelist, &q->queue_head);
 +              return;
        }
  
 -      elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 -      return rq;
 +      /*
 +       * @rq should go through flush machinery.  Mark it part of flush
 +       * sequence and submit for further processing.
 +       */
 +      memset(&rq->flush, 0, sizeof(rq->flush));
 +      INIT_LIST_HEAD(&rq->flush.list);
 +      rq->cmd_flags |= REQ_FLUSH_SEQ;
 +      rq->end_io = flush_data_end_io;
 +
 +      blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
  }
  
 -struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 +/**
 + * blk_abort_flushes - @q is being aborted, abort flush requests
 + * @q: request_queue being aborted
 + *
 + * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
 + * FLUSH/FUA requests for abortion.
 + *
 + * CONTEXT:
 + * spin_lock_irq(q->queue_lock)
 + */
 +void blk_abort_flushes(struct request_queue *q)
  {
 -      unsigned int fflags = q->flush_flags; /* may change, cache it */
 -      bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
 -      bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
 -      bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
 -      unsigned skip = 0;
 +      struct request *rq, *n;
 +      int i;
  
        /*
 -       * Special case.  If there's data but flush is not necessary,
 -       * the request can be issued directly.
 -       *
 -       * Flush w/o data should be able to be issued directly too but
 -       * currently some drivers assume that rq->bio contains
 -       * non-zero data if it isn't NULL and empty FLUSH requests
 -       * getting here usually have bio's without data.
 +       * Requests in flight for data are already owned by the dispatch
 +       * queue or the device driver.  Just restore for normal completion.
         */
 -      if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
 -              rq->cmd_flags &= ~REQ_FLUSH;
 -              if (!has_fua)
 -                      rq->cmd_flags &= ~REQ_FUA;
 -              return rq;
 +      list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
 +              list_del_init(&rq->flush.list);
 +              blk_flush_restore_request(rq);
        }
  
        /*
@@@ -962,10 -965,10 +965,10 @@@ static void throtl_update_blkio_group_w
        smp_mb__before_atomic_inc();
        atomic_inc(&td->limits_changed);
        smp_mb__after_atomic_inc();
-       throtl_schedule_delayed_work(td->queue, 0);
+       throtl_schedule_delayed_work(td, 0);
  }
  
 -void throtl_shutdown_timer_wq(struct request_queue *q)
 +static void throtl_shutdown_wq(struct request_queue *q)
  {
        struct throtl_data *td = q->td;
  
Simple merge
Simple merge
diff --cc block/genhd.c
Simple merge
Simple merge
@@@ -1143,7 -1135,7 +1142,6 @@@ static inline uint64_t rq_io_start_time
  extern int blk_throtl_init(struct request_queue *q);
  extern void blk_throtl_exit(struct request_queue *q);
  extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
- extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
 -extern void throtl_shutdown_timer_wq(struct request_queue *q);
  #else /* CONFIG_BLK_DEV_THROTTLING */
  static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
  {
  
  static inline int blk_throtl_init(struct request_queue *q) { return 0; }
  static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
- static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
 -static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
  #endif /* CONFIG_BLK_DEV_THROTTLING */
  
  #define MODULE_ALIAS_BLOCKDEV(major,minor) \