Btrfs: don't allocate a seperate csums array for direct reads
Josef Bacik [Fri, 3 Aug 2012 20:49:19 +0000 (16:49 -0400)]
We've been allocating a big array for csums instead of storing them in the
io_tree like we do for buffered reads because previously we were locking the
entire range, so we didn't have an extent state for each sector of the
range.  But now that we do the range locking as we map the buffers we can
limit the mapping lenght to sectorsize and use the private part of the
io_tree for our csums.  This allows us to avoid an extra memory allocation
for direct reads which could incur latency.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>

fs/btrfs/ctree.h
fs/btrfs/file-item.c
fs/btrfs/inode.c

index adb1cd7..3481963 100644 (file)
@@ -3192,7 +3192,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                          struct bio *bio, u32 *dst);
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 logical_offset, u32 *dst);
+                             struct bio *bio, u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
index b45b9de..857d93c 100644 (file)
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 }
 
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 offset, u32 *dst)
+                             struct bio *bio, u64 offset)
 {
-       return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
index 0918244..2d65c52 100644 (file)
@@ -5847,15 +5847,18 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        int unlock_bits = EXTENT_LOCKED;
        int ret;
 
-       lockstart = start;
-       lockend = start + len - 1;
        if (create) {
                ret = btrfs_delalloc_reserve_space(inode, len);
                if (ret)
                        return ret;
                unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+       } else {
+               len = min_t(u64, len, root->sectorsize);
        }
 
+       lockstart = start;
+       lockend = start + len - 1;
+
        /*
         * If this errors out it's because we couldn't invalidate pagecache for
         * this range and we need to fallback to buffered.
@@ -6015,7 +6018,6 @@ struct btrfs_dio_private {
        u64 logical_offset;
        u64 disk_bytenr;
        u64 bytes;
-       u32 *csums;
        void *private;
 
        /* number of bios pending for this dio */
@@ -6035,7 +6037,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 start;
-       u32 *private = dip->csums;
 
        start = dip->logical_offset;
        do {
@@ -6043,8 +6044,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        struct page *page = bvec->bv_page;
                        char *kaddr;
                        u32 csum = ~(u32)0;
+                       u64 private = ~(u32)0;
                        unsigned long flags;
 
+                       if (get_state_private(&BTRFS_I(inode)->io_tree,
+                                             start, &private))
+                               goto failed;
                        local_irq_save(flags);
                        kaddr = kmap_atomic(page);
                        csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -6054,18 +6059,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        local_irq_restore(flags);
 
                        flush_dcache_page(bvec->bv_page);
-                       if (csum != *private) {
+                       if (csum != private) {
+failed:
                                printk(KERN_ERR "btrfs csum failed ino %llu off"
                                      " %llu csum %u private %u\n",
                                      (unsigned long long)btrfs_ino(inode),
                                      (unsigned long long)start,
-                                     csum, *private);
+                                     csum, (unsigned)private);
                                err = -EIO;
                        }
                }
 
                start += bvec->bv_len;
-               private++;
                bvec++;
        } while (bvec <= bvec_end);
 
@@ -6073,7 +6078,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                      dip->logical_offset + dip->bytes - 1);
        bio->bi_private = dip->private;
 
-       kfree(dip->csums);
        kfree(dip);
 
        /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6179,7 +6183,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                         int rw, u64 file_offset, int skip_sum,
-                                        u32 *csums, int async_submit)
+                                        int async_submit)
 {
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6212,8 +6216,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                if (ret)
                        goto err;
        } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
-                                         file_offset, csums);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
                if (ret)
                        goto err;
        }
@@ -6239,10 +6242,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        u64 submit_len = 0;
        u64 map_length;
        int nr_pages = 0;
-       u32 *csums = dip->csums;
        int ret = 0;
        int async_submit = 0;
-       int write = rw & REQ_WRITE;
 
        map_length = orig_bio->bi_size;
        ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6278,16 +6279,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                        atomic_inc(&dip->pending_bios);
                        ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                     file_offset, skip_sum,
-                                                    csums, async_submit);
+                                                    async_submit);
                        if (ret) {
                                bio_put(bio);
                                atomic_dec(&dip->pending_bios);
                                goto out_err;
                        }
 
-                       /* Write's use the ordered csums */
-                       if (!write && !skip_sum)
-                               csums = csums + nr_pages;
                        start_sector += submit_len >> 9;
                        file_offset += submit_len;
 
@@ -6317,7 +6315,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 
 submit:
        ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                    csums, async_submit);
+                                    async_submit);
        if (!ret)
                return 0;
 
@@ -6353,17 +6351,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
                ret = -ENOMEM;
                goto free_ordered;
        }
-       dip->csums = NULL;
-
-       /* Write's use the ordered csum stuff, so we don't need dip->csums */
-       if (!write && !skip_sum) {
-               dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
-               if (!dip->csums) {
-                       kfree(dip);
-                       ret = -ENOMEM;
-                       goto free_ordered;
-               }
-       }
 
        dip->private = bio->bi_private;
        dip->inode = inode;