block: reduce stack footprint of blk_recount_segments()
Jens Axboe [Mon, 23 Feb 2009 08:03:10 +0000 (09:03 +0100)]
blk_recalc_rq_segments() requires a request structure passed in, which
we don't have from blk_recount_segments(). So the latter allocates one on
the stack, using > 400 bytes of stack for that. This can cause us to spill
over one page of stack from ext4 at least:

 0)     4560     400   blk_recount_segments+0x43/0x62
 1)     4160      32   bio_phys_segments+0x1c/0x24
 2)     4128      32   blk_rq_bio_prep+0x2a/0xf9
 3)     4096      32   init_request_from_bio+0xf9/0xfe
 4)     4064     112   __make_request+0x33c/0x3f6
 5)     3952     144   generic_make_request+0x2d1/0x321
 6)     3808      64   submit_bio+0xb9/0xc3
 7)     3744      48   submit_bh+0xea/0x10e
 8)     3696     368   ext4_mb_init_cache+0x257/0xa6a [ext4]
 9)     3328     288   ext4_mb_regular_allocator+0x421/0xcd9 [ext4]
10)     3040     160   ext4_mb_new_blocks+0x211/0x4b4 [ext4]
11)     2880     336   ext4_ext_get_blocks+0xb61/0xd45 [ext4]
12)     2544      96   ext4_get_blocks_wrap+0xf2/0x200 [ext4]
13)     2448      80   ext4_da_get_block_write+0x6e/0x16b [ext4]
14)     2368     352   mpage_da_map_blocks+0x7e/0x4b3 [ext4]
15)     2016     352   ext4_da_writepages+0x2ce/0x43c [ext4]
16)     1664      32   do_writepages+0x2d/0x3c
17)     1632     144   __writeback_single_inode+0x162/0x2cd
18)     1488      96   generic_sync_sb_inodes+0x1e3/0x32b
19)     1392      16   sync_sb_inodes+0xe/0x10
20)     1376      48   writeback_inodes+0x69/0xb3
21)     1328     208   balance_dirty_pages_ratelimited_nr+0x187/0x2f9
22)     1120     224   generic_file_buffered_write+0x1d4/0x2c4
23)      896     176   __generic_file_aio_write_nolock+0x35f/0x393
24)      720      80   generic_file_aio_write+0x6c/0xc8
25)      640      80   ext4_file_write+0xa9/0x137 [ext4]
26)      560     320   do_sync_write+0xf0/0x137
27)      240      48   vfs_write+0xb3/0x13c
28)      192      64   sys_write+0x4c/0x74
29)      128     128   system_call_fastpath+0x16/0x1b

Split the segment counting out into a __blk_recalc_rq_segments() helper
to avoid allocating an onstack request just for checking the physical
segment count.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>

block/blk-merge.c
include/linux/blkdev.h

index b92f5b0..a104593 100644 (file)
@@ -38,72 +38,84 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
        }
 }
 
-void blk_recalc_rq_segments(struct request *rq)
+static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
+                                            struct bio *bio,
+                                            unsigned int *seg_size_ptr)
 {
-       int nr_phys_segs;
        unsigned int phys_size;
        struct bio_vec *bv, *bvprv = NULL;
-       int seg_size;
-       int cluster;
-       struct req_iterator iter;
-       int high, highprv = 1;
-       struct request_queue *q = rq->q;
+       int cluster, i, high, highprv = 1;
+       unsigned int seg_size, nr_phys_segs;
+       struct bio *fbio;
 
-       if (!rq->bio)
-               return;
+       if (!bio)
+               return 0;
 
+       fbio = bio;
        cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
        seg_size = 0;
        phys_size = nr_phys_segs = 0;
-       rq_for_each_segment(bv, rq, iter) {
-               /*
-                * the trick here is making sure that a high page is never
-                * considered part of another segment, since that might
-                * change with the bounce page.
-                */
-               high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
-               if (high || highprv)
-                       goto new_segment;
-               if (cluster) {
-                       if (seg_size + bv->bv_len > q->max_segment_size)
-                               goto new_segment;
-                       if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
-                               goto new_segment;
-                       if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+       for_each_bio(bio) {
+               bio_for_each_segment(bv, bio, i) {
+                       /*
+                        * the trick here is making sure that a high page is
+                        * never considered part of another segment, since that
+                        * might change with the bounce page.
+                        */
+                       high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
+                       if (high || highprv)
                                goto new_segment;
+                       if (cluster) {
+                               if (seg_size + bv->bv_len > q->max_segment_size)
+                                       goto new_segment;
+                               if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+                                       goto new_segment;
+                               if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+                                       goto new_segment;
+
+                               seg_size += bv->bv_len;
+                               bvprv = bv;
+                               continue;
+                       }
+new_segment:
+                       if (nr_phys_segs == 1 && seg_size >
+                           fbio->bi_seg_front_size)
+                               fbio->bi_seg_front_size = seg_size;
 
-                       seg_size += bv->bv_len;
+                       nr_phys_segs++;
                        bvprv = bv;
-                       continue;
+                       seg_size = bv->bv_len;
+                       highprv = high;
                }
-new_segment:
-               if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
-                       rq->bio->bi_seg_front_size = seg_size;
-
-               nr_phys_segs++;
-               bvprv = bv;
-               seg_size = bv->bv_len;
-               highprv = high;
        }
 
-       if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+       if (seg_size_ptr)
+               *seg_size_ptr = seg_size;
+
+       return nr_phys_segs;
+}
+
+void blk_recalc_rq_segments(struct request *rq)
+{
+       unsigned int seg_size = 0, phys_segs;
+
+       phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size);
+
+       if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
                rq->bio->bi_seg_front_size = seg_size;
        if (seg_size > rq->biotail->bi_seg_back_size)
                rq->biotail->bi_seg_back_size = seg_size;
 
-       rq->nr_phys_segments = nr_phys_segs;
+       rq->nr_phys_segments = phys_segs;
 }
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
-       struct request rq;
        struct bio *nxt = bio->bi_next;
-       rq.q = q;
-       rq.bio = rq.biotail = bio;
+
        bio->bi_next = NULL;
-       blk_recalc_rq_segments(&rq);
+       bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL);
        bio->bi_next = nxt;
-       bio->bi_phys_segments = rq.nr_phys_segments;
        bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
index dcaa0fd..465d6ba 100644 (file)
@@ -708,6 +708,8 @@ struct req_iterator {
 };
 
 /* This should not be used directly - use rq_for_each_segment */
+#define for_each_bio(_bio)             \
+       for (; _bio; _bio = _bio->bi_next)
 #define __rq_for_each_bio(_bio, rq)    \
        if ((rq->bio))                  \
                for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)