block: add sysfs file for controlling io stats accounting
Jens Axboe [Fri, 23 Jan 2009 09:54:44 +0000 (10:54 +0100)]
This allows us to turn off disk stat accounting completely, for the cases
where the 0.5-1% reduction in system time is important.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

block/blk-core.c
block/blk-sysfs.c
include/linux/blkdev.h

index ae75c04..ca69f3d 100644 (file)
@@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue;
 
 static void drive_stat_acct(struct request *rq, int new_io)
 {
+       struct gendisk *disk = rq->rq_disk;
        struct hd_struct *part;
        int rw = rq_data_dir(rq);
        int cpu;
 
-       if (!blk_fs_request(rq) || !rq->rq_disk)
+       if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue))
                return;
 
        cpu = part_stat_lock();
@@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
        q->request_fn           = rfn;
        q->prep_rq_fn           = NULL;
        q->unplug_fn            = generic_unplug_device;
-       q->queue_flags          = (1 << QUEUE_FLAG_CLUSTER |
-                                  1 << QUEUE_FLAG_STACKABLE);
+       q->queue_flags          = QUEUE_FLAG_DEFAULT;
        q->queue_lock           = lock;
 
        blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
@@ -1663,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req)
 }
 EXPORT_SYMBOL(blkdev_dequeue_request);
 
+static void blk_account_io_completion(struct request *req, unsigned int bytes)
+{
+       struct gendisk *disk = req->rq_disk;
+
+       if (!disk || !blk_queue_io_stat(disk->queue))
+               return;
+
+       if (blk_fs_request(req)) {
+               const int rw = rq_data_dir(req);
+               struct hd_struct *part;
+               int cpu;
+
+               cpu = part_stat_lock();
+               part = disk_map_sector_rcu(req->rq_disk, req->sector);
+               part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+               part_stat_unlock();
+       }
+}
+
+static void blk_account_io_done(struct request *req)
+{
+       struct gendisk *disk = req->rq_disk;
+
+       if (!disk || !blk_queue_io_stat(disk->queue))
+               return;
+
+       /*
+        * Account IO completion.  bar_rq isn't accounted as a normal
+        * IO on queueing nor completion.  Accounting the containing
+        * request is enough.
+        */
+       if (blk_fs_request(req) && req != &req->q->bar_rq) {
+               unsigned long duration = jiffies - req->start_time;
+               const int rw = rq_data_dir(req);
+               struct hd_struct *part;
+               int cpu;
+
+               cpu = part_stat_lock();
+               part = disk_map_sector_rcu(disk, req->sector);
+
+               part_stat_inc(cpu, part, ios[rw]);
+               part_stat_add(cpu, part, ticks[rw], duration);
+               part_round_stats(cpu, part);
+               part_dec_in_flight(part);
+
+               part_stat_unlock();
+       }
+}
+
 /**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
@@ -1698,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error,
                                (unsigned long long)req->sector);
        }
 
-       if (blk_fs_request(req) && req->rq_disk) {
-               const int rw = rq_data_dir(req);
-               struct hd_struct *part;
-               int cpu;
-
-               cpu = part_stat_lock();
-               part = disk_map_sector_rcu(req->rq_disk, req->sector);
-               part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
-               part_stat_unlock();
-       }
+       blk_account_io_completion(req, nr_bytes);
 
        total_bytes = bio_nbytes = 0;
        while ((bio = req->bio) != NULL) {
@@ -1787,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error,
  */
 static void end_that_request_last(struct request *req, int error)
 {
-       struct gendisk *disk = req->rq_disk;
-
        if (blk_rq_tagged(req))
                blk_queue_end_tag(req->q, req);
 
@@ -1800,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error)
 
        blk_delete_timer(req);
 
-       /*
-        * Account IO completion.  bar_rq isn't accounted as a normal
-        * IO on queueing nor completion.  Accounting the containing
-        * request is enough.
-        */
-       if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
-               unsigned long duration = jiffies - req->start_time;
-               const int rw = rq_data_dir(req);
-               struct hd_struct *part;
-               int cpu;
-
-               cpu = part_stat_lock();
-               part = disk_map_sector_rcu(disk, req->sector);
-
-               part_stat_inc(cpu, part, ios[rw]);
-               part_stat_add(cpu, part, ticks[rw], duration);
-               part_round_stats(cpu, part);
-               part_dec_in_flight(part);
-
-               part_stat_unlock();
-       }
+       blk_account_io_done(req);
 
        if (req->end_io)
                req->end_io(req, error);
index b538ab8..e29ddfc 100644 (file)
@@ -197,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
        return ret;
 }
 
+static ssize_t queue_iostats_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(blk_queue_io_stat(q), page);
+}
+
+static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
+                                  size_t count)
+{
+       unsigned long stats;
+       ssize_t ret = queue_var_store(&stats, page, count);
+
+       spin_lock_irq(q->queue_lock);
+       if (stats)
+               queue_flag_set(QUEUE_FLAG_IO_STAT, q);
+       else
+               queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
+       spin_unlock_irq(q->queue_lock);
+
+       return ret;
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
        .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
        .show = queue_requests_show,
@@ -249,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = {
        .store = queue_rq_affinity_store,
 };
 
+static struct queue_sysfs_entry queue_iostats_entry = {
+       .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+       .show = queue_iostats_show,
+       .store = queue_iostats_store,
+};
+
 static struct attribute *default_attrs[] = {
        &queue_requests_entry.attr,
        &queue_ra_entry.attr,
@@ -259,6 +286,7 @@ static struct attribute *default_attrs[] = {
        &queue_nonrot_entry.attr,
        &queue_nomerges_entry.attr,
        &queue_rq_affinity_entry.attr,
+       &queue_iostats_entry.attr,
        NULL,
 };
 
index 75426e4..d08c4b8 100644 (file)
@@ -451,6 +451,11 @@ struct request_queue
 #define QUEUE_FLAG_STACKABLE   13      /* supports request stacking */
 #define QUEUE_FLAG_NONROT      14      /* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
+#define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
+
+#define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
+                                (1 << QUEUE_FLAG_CLUSTER) |            \
+                                 1 << QUEUE_FLAG_STACKABLE)
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -567,6 +572,7 @@ enum {
 #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)    test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
+#define blk_queue_io_stat(q)   test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_flushing(q)  ((q)->ordseq)
 #define blk_queue_stackable(q) \
        test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)