Btrfs: move data checksumming into a dedicated tree
Chris Mason [Mon, 8 Dec 2008 21:58:54 +0000 (16:58 -0500)]
Btrfs stores checksums for each data block.  Until now, they have
been stored in the subvolume trees, indexed by the inode that is
referencing the data block.  This means that when we read the inode,
we've probably read in at least some checksums as well.

But, this has a few problems:

* The checksums are indexed by logical offset in the file.  When
compression is on, this means we have to do the expensive checksumming
on the uncompressed data.  It would be faster if we could checksum
the compressed data instead.

* If we implement encryption, we'll be checksumming the plain text and
storing that on disk.  This is significantly less secure.

* For either compression or encryption, we have to get the plain text
back before we can verify the checksum as correct.  This makes the raid
layer balancing and extent moving much more expensive.

* It makes the front end caching code more complex, as we have touch
the subvolume and inodes as we cache extents.

* There is potentitally one copy of the checksum in each subvolume
referencing an extent.

The solution used here is to store the extent checksums in a dedicated
tree.  This allows us to index the checksums by phyiscal extent
start and length.  It means:

* The checksum is against the data stored on disk, after any compression
or encryption is done.

* The checksum is stored in a central location, and can be verified without
following back references, or reading inodes.

This makes compression significantly faster by reducing the amount of
data that needs to be checksummed.  It will also allow much faster
raid management code in general.

The checksums are indexed by a key with a fixed objectid (a magic value
in ctree.h) and offset set to the starting byte of the extent.  This
allows us to copy the checksum items into the fsync log tree directly (or
any other tree), without having to invent a second format for them.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/tree-log.c
fs/btrfs/volumes.c

index 4febe2e..ad72741 100644 (file)
@@ -69,11 +69,27 @@ struct compressed_bio {
 
        /* IO errors */
        int errors;
+       int mirror_num;
 
        /* for reads, this is the bio we are copying the data into */
        struct bio *orig_bio;
+
+       /*
+        * the start of a variable length array of checksums only
+        * used by reads
+        */
+       u32 sums;
 };
 
+static inline int compressed_bio_size(struct btrfs_root *root,
+                                     unsigned long disk_size)
+{
+       u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+       return sizeof(struct compressed_bio) +
+               ((disk_size + root->sectorsize - 1) / root->sectorsize) *
+               csum_size;
+}
+
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
                                        u64 first_byte, gfp_t gfp_flags)
 {
@@ -96,6 +112,47 @@ static struct bio *compressed_bio_alloc(struct block_device *bdev,
        return bio;
 }
 
+static int check_compressed_csum(struct inode *inode,
+                                struct compressed_bio *cb,
+                                u64 disk_start)
+{
+       int ret;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct page *page;
+       unsigned long i;
+       char *kaddr;
+       u32 csum;
+       u32 *cb_sum = &cb->sums;
+
+       if (btrfs_test_opt(root, NODATASUM) ||
+           btrfs_test_flag(inode, NODATASUM))
+               return 0;
+
+       for (i = 0; i < cb->nr_pages; i++) {
+               page = cb->compressed_pages[i];
+               csum = ~(u32)0;
+
+               kaddr = kmap_atomic(page, KM_USER0);
+               csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
+               btrfs_csum_final(csum, (char *)&csum);
+               kunmap_atomic(kaddr, KM_USER0);
+
+               if (csum != *cb_sum) {
+                       printk("btrfs csum failed ino %lu extent %llu csum %u "
+                              "wanted %u mirror %d\n", inode->i_ino,
+                              (unsigned long long)disk_start,
+                              csum, *cb_sum, cb->mirror_num);
+                       ret = -EIO;
+                       goto fail;
+               }
+               cb_sum++;
+
+       }
+       ret = 0;
+fail:
+       return ret;
+}
+
 /* when we finish reading compressed pages from the disk, we
  * decompress them and then run the bio end_io routines on the
  * decompressed pages (in the inode address space).
@@ -124,16 +181,21 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        if (!atomic_dec_and_test(&cb->pending_bios))
                goto out;
 
+       inode = cb->inode;
+       ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
+       if (ret)
+               goto csum_failed;
+
        /* ok, we're the last bio for this extent, lets start
         * the decompression.
         */
-       inode = cb->inode;
        tree = &BTRFS_I(inode)->io_tree;
        ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
                                        cb->start,
                                        cb->orig_bio->bi_io_vec,
                                        cb->orig_bio->bi_vcnt,
                                        cb->compressed_len);
+csum_failed:
        if (ret)
                cb->errors = 1;
 
@@ -148,8 +210,21 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        /* do io completion on the original bio */
        if (cb->errors) {
                bio_io_error(cb->orig_bio);
-       } else
+       } else {
+               int bio_index = 0;
+               struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
+
+               /*
+                * we have verified the checksum already, set page
+                * checked so the end_io handlers know about it
+                */
+               while(bio_index < cb->orig_bio->bi_vcnt) {
+                       SetPageChecked(bvec->bv_page);
+                       bvec++;
+                       bio_index++;
+               }
                bio_endio(cb->orig_bio, 0);
+       }
 
        /* finally free the cb struct */
        kfree(cb->compressed_pages);
@@ -277,12 +352,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        int ret;
 
        WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
-       cb = kmalloc(sizeof(*cb), GFP_NOFS);
+       cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
        cb->start = start;
        cb->len = len;
+       cb->mirror_num = 0;
        cb->compressed_pages = compressed_pages;
        cb->compressed_len = compressed_len;
        cb->orig_bio = NULL;
@@ -290,9 +366,6 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
        bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       ret = btrfs_csum_file_bytes(root, inode, start, len);
-       BUG_ON(ret);
-
        bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
@@ -325,6 +398,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
                        BUG_ON(ret);
 
+                       ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+                       BUG_ON(ret);
+
                        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
                        BUG_ON(ret);
 
@@ -348,6 +424,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
 
+       ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+       BUG_ON(ret);
+
        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
        BUG_ON(ret);
 
@@ -510,6 +589,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        u64 em_start;
        struct extent_map *em;
        int ret;
+       u32 *sums;
 
        tree = &BTRFS_I(inode)->io_tree;
        em_tree = &BTRFS_I(inode)->extent_tree;
@@ -521,15 +601,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                   PAGE_CACHE_SIZE);
        spin_unlock(&em_tree->lock);
 
-       cb = kmalloc(sizeof(*cb), GFP_NOFS);
+       compressed_len = em->block_len;
+       cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
+       cb->mirror_num = mirror_num;
+       sums = &cb->sums;
 
        cb->start = em->orig_start;
-       compressed_len = em->block_len;
        em_len = em->len;
        em_start = em->start;
+
        free_extent_map(em);
        em = NULL;
 
@@ -551,11 +634,6 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
        add_ra_bio_pages(inode, em_start + em_len, cb);
 
-       if (!btrfs_test_opt(root, NODATASUM) &&
-           !btrfs_test_flag(inode, NODATASUM)) {
-               btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
-       }
-
        /* include any pages we added in add_ra-bio_pages */
        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
        cb->len = uncompressed_len;
@@ -568,6 +646,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        for (page_index = 0; page_index < nr_pages; page_index++) {
                page = cb->compressed_pages[page_index];
                page->mapping = inode->i_mapping;
+               page->index = em_start >> PAGE_CACHE_SHIFT;
+
                if (comp_bio->bi_size)
                        ret = tree->ops->merge_bio_hook(page, 0,
                                                        PAGE_CACHE_SIZE,
@@ -591,7 +671,16 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                         */
                        atomic_inc(&cb->pending_bios);
 
-                       ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+                       if (!btrfs_test_opt(root, NODATASUM) &&
+                           !btrfs_test_flag(inode, NODATASUM)) {
+                               btrfs_lookup_bio_sums(root, inode, comp_bio,
+                                                     sums);
+                       }
+                       sums += (comp_bio->bi_size + root->sectorsize - 1) /
+                               root->sectorsize;
+
+                       ret = btrfs_map_bio(root, READ, comp_bio,
+                                           mirror_num, 0);
                        BUG_ON(ret);
 
                        bio_put(comp_bio);
@@ -610,7 +699,12 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
        BUG_ON(ret);
 
-       ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+       if (!btrfs_test_opt(root, NODATASUM) &&
+           !btrfs_test_flag(inode, NODATASUM)) {
+               btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+       }
+
+       ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
        BUG_ON(ret);
 
        bio_put(comp_bio);
index 96f2ec7..242b961 100644 (file)
@@ -73,6 +73,9 @@ struct btrfs_ordered_sum;
 /* directory objectid inside the root tree */
 #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
 
+/* holds checksums of all the data extents */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
 /* orhpan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
@@ -84,6 +87,13 @@ struct btrfs_ordered_sum;
 #define BTRFS_TREE_RELOC_OBJECTID -8ULL
 #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
 
+/*
+ * extent checksums all have this objectid
+ * this allows them to share the logging tree
+ * for fsyncs
+ */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -634,6 +644,7 @@ struct btrfs_fs_info {
        struct btrfs_root *chunk_root;
        struct btrfs_root *dev_root;
        struct btrfs_root *fs_root;
+       struct btrfs_root *csum_root;
 
        /* the log root tree is a directory of all the other log roots */
        struct btrfs_root *log_root_tree;
@@ -716,6 +727,7 @@ struct btrfs_fs_info {
        struct btrfs_workers workers;
        struct btrfs_workers delalloc_workers;
        struct btrfs_workers endio_workers;
+       struct btrfs_workers endio_meta_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
        /*
@@ -858,13 +870,12 @@ struct btrfs_root {
  * extent data is for file data
  */
 #define BTRFS_EXTENT_DATA_KEY  108
+
 /*
- * csum items have the checksums for data in the extents
+ * extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
  */
-#define BTRFS_CSUM_ITEM_KEY    120
-
-
-/* reserve 21-31 for other file/dir stuff */
+#define BTRFS_EXTENT_CSUM_KEY  128
 
 /*
  * root items point to tree roots.  There are typically in the root
@@ -1917,7 +1928,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
 
 /* file-item.c */
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
-                         struct bio *bio);
+                         struct bio *bio, u32 *dst);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
@@ -1929,17 +1940,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_path *path, u64 objectid,
                             u64 bytenr, int mod);
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums);
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
-                      struct bio *bio);
+                      struct bio *bio, u64 file_start, int contig);
 int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
                          u64 start, unsigned long len);
 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
-                                         u64 objectid, u64 offset,
-                                         int cow);
+                                         u64 bytenr, int cow);
 int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct btrfs_path *path,
                        u64 isize);
index 3eb7c25..61dc3b2 100644 (file)
@@ -445,11 +445,18 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->error = err;
        end_io_wq->work.func = end_workqueue_fn;
        end_io_wq->work.flags = 0;
-       if (bio->bi_rw & (1 << BIO_RW))
+
+       if (bio->bi_rw & (1 << BIO_RW)) {
                btrfs_queue_worker(&fs_info->endio_write_workers,
                                   &end_io_wq->work);
-       else
-               btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+       } else {
+               if (end_io_wq->metadata)
+                       btrfs_queue_worker(&fs_info->endio_meta_workers,
+                                          &end_io_wq->work);
+               else
+                       btrfs_queue_worker(&fs_info->endio_workers,
+                                          &end_io_wq->work);
+       }
 }
 
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
@@ -1208,6 +1215,9 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
        info = (struct btrfs_fs_info *)bdi->unplug_io_data;
        list_for_each(cur, &info->fs_devices->devices) {
                device = list_entry(cur, struct btrfs_device, dev_list);
+               if (!device->bdev)
+                       continue;
+
                bdi = blk_get_backing_dev_info(device->bdev);
                if (bdi->unplug_io_fn) {
                        bdi->unplug_io_fn(bdi, page);
@@ -1344,7 +1354,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
         * blocksize <= pagesize, it is basically a noop
         */
        if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
-               btrfs_queue_worker(&fs_info->endio_workers,
+               btrfs_queue_worker(&fs_info->endio_meta_workers,
                                   &end_io_wq->work);
                return;
        }
@@ -1454,6 +1464,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        struct buffer_head *bh;
        struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
                                                 GFP_NOFS);
+       struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
+                                                GFP_NOFS);
        struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
                                               GFP_NOFS);
        struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
@@ -1470,7 +1482,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        struct btrfs_super_block *disk_super;
 
        if (!extent_root || !tree_root || !fs_info ||
-           !chunk_root || !dev_root) {
+           !chunk_root || !dev_root || !csum_root) {
                err = -ENOMEM;
                goto fail;
        }
@@ -1487,6 +1499,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
        fs_info->extent_root = extent_root;
+       fs_info->csum_root = csum_root;
        fs_info->chunk_root = chunk_root;
        fs_info->dev_root = dev_root;
        fs_info->fs_devices = fs_devices;
@@ -1652,6 +1665,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
        btrfs_init_workers(&fs_info->endio_workers, "endio",
                           fs_info->thread_pool_size);
+       btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
+                          fs_info->thread_pool_size);
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size);
 
@@ -1667,6 +1682,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
        btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_meta_workers,
+                           fs_info->thread_pool_size);
        btrfs_start_workers(&fs_info->endio_write_workers,
                            fs_info->thread_pool_size);
 
@@ -1751,6 +1768,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (ret)
                goto fail_extent_root;
 
+       ret = find_and_setup_root(tree_root, fs_info,
+                                 BTRFS_CSUM_TREE_OBJECTID, csum_root);
+       if (ret)
+               goto fail_extent_root;
+
+       csum_root->track_dirty = 1;
+
        btrfs_read_block_groups(extent_root);
 
        fs_info->generation = generation + 1;
@@ -1761,7 +1785,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (!fs_info->cleaner_kthread)
-               goto fail_extent_root;
+               goto fail_csum_root;
 
        fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                   tree_root,
@@ -1825,6 +1849,8 @@ fail_cleaner:
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
+fail_csum_root:
+       free_extent_buffer(csum_root->node);
 fail_extent_root:
        free_extent_buffer(extent_root->node);
 fail_tree_root:
@@ -1838,6 +1864,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
@@ -1853,6 +1880,7 @@ fail:
        kfree(fs_info);
        kfree(chunk_root);
        kfree(dev_root);
+       kfree(csum_root);
        return ERR_PTR(err);
 }
 
@@ -2131,6 +2159,9 @@ int close_ctree(struct btrfs_root *root)
        if (root->fs_info->dev_root->node);
                free_extent_buffer(root->fs_info->dev_root->node);
 
+       if (root->fs_info->csum_root->node);
+               free_extent_buffer(root->fs_info->csum_root->node);
+
        btrfs_free_block_groups(root->fs_info);
 
        del_fs_roots(fs_info);
@@ -2141,6 +2172,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->delalloc_workers);
        btrfs_stop_workers(&fs_info->workers);
        btrfs_stop_workers(&fs_info->endio_workers);
+       btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
 
@@ -2163,6 +2195,7 @@ int close_ctree(struct btrfs_root *root)
        kfree(fs_info->tree_root);
        kfree(fs_info->chunk_root);
        kfree(fs_info->dev_root);
+       kfree(fs_info->csum_root);
        return 0;
 }
 
index c3dfe2a..7449ecf 100644 (file)
@@ -1732,6 +1732,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        int whole_page;
        int ret;
 
+       if (err)
+               uptodate = 0;
+
        do {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -1761,6 +1764,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        if (ret == 0) {
                                uptodate =
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
+                               if (err)
+                                       uptodate = 0;
                                continue;
                        }
                }
index 234ed44..a3ad2ce 100644 (file)
@@ -74,8 +74,7 @@ out:
 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path,
-                                         u64 objectid, u64 offset,
-                                         int cow)
+                                         u64 bytenr, int cow)
 {
        int ret;
        struct btrfs_key file_key;
@@ -87,9 +86,9 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                btrfs_super_csum_size(&root->fs_info->super_copy);
        int csums_in_item;
 
-       file_key.objectid = objectid;
-       file_key.offset = offset;
-       btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+       file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+       file_key.offset = bytenr;
+       btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
        ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
        if (ret < 0)
                goto fail;
@@ -100,11 +99,10 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                        goto fail;
                path->slots[0]--;
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
-                   found_key.objectid != objectid) {
+               if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
                        goto fail;
-               }
-               csum_offset = (offset - found_key.offset) >>
+
+               csum_offset = (bytenr - found_key.offset) >>
                                root->fs_info->sb->s_blocksize_bits;
                csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
                csums_in_item /= csum_size;
@@ -143,7 +141,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
-                         struct bio *bio)
+                         struct bio *bio, u32 *dst)
 {
        u32 sum;
        struct bio_vec *bvec = bio->bi_io_vec;
@@ -151,6 +149,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
        u64 offset;
        u64 item_start_offset = 0;
        u64 item_last_offset = 0;
+       u64 disk_bytenr;
        u32 diff;
        u16 csum_size =
                btrfs_super_csum_size(&root->fs_info->super_copy);
@@ -165,21 +164,22 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 
        WARN_ON(bio->bi_vcnt <= 0);
 
+       disk_bytenr = (u64)bio->bi_sector << 9;
        while(bio_index < bio->bi_vcnt) {
                offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-               ret = btrfs_find_ordered_sum(inode, offset, &sum);
+               ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
                if (ret == 0)
                        goto found;
 
-               if (!item || offset < item_start_offset ||
-                   offset >= item_last_offset) {
+               if (!item || disk_bytenr < item_start_offset ||
+                   disk_bytenr >= item_last_offset) {
                        struct btrfs_key found_key;
                        u32 item_size;
 
                        if (item)
                                btrfs_release_path(root, path);
-                       item = btrfs_lookup_csum(NULL, root, path,
-                                                inode->i_ino, offset, 0);
+                       item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
+                                                path, disk_bytenr, 0);
                        if (IS_ERR(item)) {
                                ret = PTR_ERR(item);
                                if (ret == -ENOENT || ret == -EFBIG)
@@ -208,7 +208,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                 * this byte range must be able to fit inside
                 * a single leaf so it will also fit inside a u32
                 */
-               diff = offset - item_start_offset;
+               diff = disk_bytenr - item_start_offset;
                diff = diff / root->sectorsize;
                diff = diff * csum_size;
 
@@ -216,7 +216,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                                   ((unsigned long)item) + diff,
                                   csum_size);
 found:
-               set_state_private(io_tree, offset, sum);
+               if (dst)
+                       *dst++ = sum;
+               else
+                       set_state_private(io_tree, offset, sum);
+               disk_bytenr += bvec->bv_len;
                bio_index++;
                bvec++;
        }
@@ -224,75 +228,8 @@ found:
        return 0;
 }
 
-int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
-                         u64 start, unsigned long len)
-{
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       struct btrfs_ordered_extent *ordered;
-       char *data;
-       struct page *page;
-       unsigned long total_bytes = 0;
-       unsigned long this_sum_bytes = 0;
-
-       sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
-       if (!sums)
-               return -ENOMEM;
-
-       sector_sum = sums->sums;
-       sums->file_offset = start;
-       sums->len = len;
-       INIT_LIST_HEAD(&sums->list);
-       ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
-       BUG_ON(!ordered);
-
-       while(len > 0) {
-               if (start >= ordered->file_offset + ordered->len ||
-                   start < ordered->file_offset) {
-                       sums->len = this_sum_bytes;
-                       this_sum_bytes = 0;
-                       btrfs_add_ordered_sum(inode, ordered, sums);
-                       btrfs_put_ordered_extent(ordered);
-
-                       sums = kzalloc(btrfs_ordered_sum_size(root, len),
-                                      GFP_NOFS);
-                       BUG_ON(!sums);
-                       sector_sum = sums->sums;
-                       sums->len = len;
-                       sums->file_offset = start;
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                     sums->file_offset);
-                       BUG_ON(!ordered);
-               }
-
-               page = find_get_page(inode->i_mapping,
-                                    start >> PAGE_CACHE_SHIFT);
-
-               data = kmap_atomic(page, KM_USER0);
-               sector_sum->sum = ~(u32)0;
-               sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum,
-                                                 PAGE_CACHE_SIZE);
-               kunmap_atomic(data, KM_USER0);
-               btrfs_csum_final(sector_sum->sum,
-                                (char *)&sector_sum->sum);
-               sector_sum->offset = page_offset(page);
-               page_cache_release(page);
-
-               sector_sum++;
-               total_bytes += PAGE_CACHE_SIZE;
-               this_sum_bytes += PAGE_CACHE_SIZE;
-               start += PAGE_CACHE_SIZE;
-
-               WARN_ON(len < PAGE_CACHE_SIZE);
-               len -= PAGE_CACHE_SIZE;
-       }
-       btrfs_add_ordered_sum(inode, ordered, sums);
-       btrfs_put_ordered_extent(ordered);
-       return 0;
-}
-
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
-                      struct bio *bio)
+                      struct bio *bio, u64 file_start, int contig)
 {
        struct btrfs_ordered_sum *sums;
        struct btrfs_sector_sum *sector_sum;
@@ -303,6 +240,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
        unsigned long total_bytes = 0;
        unsigned long this_sum_bytes = 0;
        u64 offset;
+       u64 disk_bytenr;
 
        WARN_ON(bio->bi_vcnt <= 0);
        sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
@@ -310,16 +248,25 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                return -ENOMEM;
 
        sector_sum = sums->sums;
-       sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+       disk_bytenr = (u64)bio->bi_sector << 9;
        sums->len = bio->bi_size;
        INIT_LIST_HEAD(&sums->list);
-       ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
+
+       if (contig)
+               offset = file_start;
+       else
+               offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+       ordered = btrfs_lookup_ordered_extent(inode, offset);
        BUG_ON(!ordered);
+       sums->bytenr = ordered->start;
 
        while(bio_index < bio->bi_vcnt) {
-               offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-               if (offset >= ordered->file_offset + ordered->len ||
-                   offset < ordered->file_offset) {
+               if (!contig)
+                       offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+               if (!contig && (offset >= ordered->file_offset + ordered->len ||
+                   offset < ordered->file_offset)) {
                        unsigned long bytes_left;
                        sums->len = this_sum_bytes;
                        this_sum_bytes = 0;
@@ -333,10 +280,9 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                        BUG_ON(!sums);
                        sector_sum = sums->sums;
                        sums->len = bytes_left;
-                       sums->file_offset = offset;
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                     sums->file_offset);
+                       ordered = btrfs_lookup_ordered_extent(inode, offset);
                        BUG_ON(!ordered);
+                       sums->bytenr = ordered->start;
                }
 
                data = kmap_atomic(bvec->bv_page, KM_USER0);
@@ -348,13 +294,14 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                kunmap_atomic(data, KM_USER0);
                btrfs_csum_final(sector_sum->sum,
                                 (char *)&sector_sum->sum);
-               sector_sum->offset = page_offset(bvec->bv_page) +
-                       bvec->bv_offset;
+               sector_sum->bytenr = disk_bytenr;
 
                sector_sum++;
                bio_index++;
                total_bytes += bvec->bv_len;
                this_sum_bytes += bvec->bv_len;
+               disk_bytenr += bvec->bv_len;
+               offset += bvec->bv_len;
                bvec++;
        }
        this_sum_bytes = 0;
@@ -364,11 +311,10 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
 }
 
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, struct inode *inode,
+                          struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums)
 {
-       u64 objectid = inode->i_ino;
-       u64 offset;
+       u64 bytenr;
        int ret;
        struct btrfs_key file_key;
        struct btrfs_key found_key;
@@ -396,13 +342,12 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 again:
        next_offset = (u64)-1;
        found_next = 0;
-       offset = sector_sum->offset;
-       file_key.objectid = objectid;
-       file_key.offset = offset;
-       btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+       file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+       file_key.offset = sector_sum->bytenr;
+       bytenr = sector_sum->bytenr;
+       btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
 
-       mutex_lock(&BTRFS_I(inode)->csum_mutex);
-       item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
+       item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
        if (!IS_ERR(item)) {
                leaf = path->nodes[0];
                ret = 0;
@@ -432,8 +377,8 @@ again:
                        slot = 0;
                }
                btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
-               if (found_key.objectid != objectid ||
-                   found_key.type != BTRFS_CSUM_ITEM_KEY) {
+               if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+                   found_key.type != BTRFS_EXTENT_CSUM_KEY) {
                        found_next = 1;
                        goto insert;
                }
@@ -460,10 +405,10 @@ again:
        path->slots[0]--;
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-       csum_offset = (offset - found_key.offset) >>
+       csum_offset = (bytenr - found_key.offset) >>
                        root->fs_info->sb->s_blocksize_bits;
-       if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
-           found_key.objectid != objectid ||
+       if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
+           found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
            csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
                goto insert;
        }
@@ -482,8 +427,18 @@ insert:
        btrfs_release_path(root, path);
        csum_offset = 0;
        if (found_next) {
-               u64 tmp = min((u64)i_size_read(inode), next_offset);
-               tmp -= offset & ~((u64)root->sectorsize -1);
+               u64 tmp = total_bytes + root->sectorsize;
+               u64 next_sector = sector_sum->bytenr;
+               struct btrfs_sector_sum *next = sector_sum + 1;
+
+               while(tmp < sums->len) {
+                       if (next_sector + root->sectorsize != next->bytenr)
+                               break;
+                       tmp += root->sectorsize;
+                       next_sector = next->bytenr;
+                       next++;
+               }
+               tmp = min(tmp, next_offset - file_key.offset);
                tmp >>= root->fs_info->sb->s_blocksize_bits;
                tmp = max((u64)1, tmp);
                tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
@@ -510,7 +465,6 @@ found:
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
        eb_token = NULL;
-       mutex_unlock(&BTRFS_I(inode)->csum_mutex);
        cond_resched();
 next_sector:
 
@@ -541,9 +495,9 @@ next_sector:
        if (total_bytes < sums->len) {
                item = (struct btrfs_csum_item *)((char *)item +
                                                  csum_size);
-               if (item < item_end && offset + PAGE_CACHE_SIZE ==
-                   sector_sum->offset) {
-                           offset = sector_sum->offset;
+               if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
+                   sector_sum->bytenr) {
+                       bytenr = sector_sum->bytenr;
                        goto next_sector;
                }
        }
@@ -562,7 +516,6 @@ out:
        return ret;
 
 fail_unlock:
-       mutex_unlock(&BTRFS_I(inode)->csum_mutex);
        goto out;
 }
 
index 09efc94..c03d847 100644 (file)
@@ -1221,7 +1221,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
 
-       ret = btrfs_csum_one_bio(root, inode, bio);
+       ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
        BUG_ON(ret);
        return 0;
 }
@@ -1259,12 +1259,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                btrfs_test_flag(inode, NODATASUM);
 
        if (!(rw & (1 << BIO_RW))) {
-
-               if (bio_flags & EXTENT_BIO_COMPRESSED)
+               if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                                                    mirror_num, bio_flags);
-               else if (!skip_sum)
-                       btrfs_lookup_bio_sums(root, inode, bio);
+               } else if (!skip_sum)
+                       btrfs_lookup_bio_sums(root, inode, bio, NULL);
                goto mapit;
        } else if (!skip_sum) {
                /* we're doing a write, do the async checksumming */
@@ -1292,8 +1291,8 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
        btrfs_set_trans_block_group(trans, inode);
        list_for_each(cur, list) {
                sum = list_entry(cur, struct btrfs_ordered_sum, list);
-               btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
-                                      inode, sum);
+               btrfs_csum_file_blocks(trans,
+                      BTRFS_I(inode)->root->fs_info->csum_root, sum);
        }
        return 0;
 }
@@ -1545,6 +1544,7 @@ struct io_failure_record {
        u64 start;
        u64 len;
        u64 logical;
+       unsigned long bio_flags;
        int last_mirror;
 };
 
@@ -1563,7 +1563,6 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
        int ret;
        int rw;
        u64 logical;
-       unsigned long bio_flags = 0;
 
        ret = get_state_private(failure_tree, start, &private);
        if (ret) {
@@ -1573,6 +1572,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                failrec->start = start;
                failrec->len = end - start + 1;
                failrec->last_mirror = 0;
+               failrec->bio_flags = 0;
 
                spin_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, failrec->len);
@@ -1588,8 +1588,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                }
                logical = start - em->start;
                logical = em->block_start + logical;
-               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
-                       bio_flags = EXTENT_BIO_COMPRESSED;
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+                       logical = em->block_start;
+                       failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+               }
                failrec->logical = logical;
                free_extent_map(em);
                set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
@@ -1626,6 +1628,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
        bio->bi_sector = failrec->logical >> 9;
        bio->bi_bdev = failed_bio->bi_bdev;
        bio->bi_size = 0;
+
        bio_add_page(bio, page, failrec->len, start - page_offset(page));
        if (failed_bio->bi_rw & (1 << BIO_RW))
                rw = WRITE;
@@ -1634,7 +1637,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
 
        BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
                                                      failrec->last_mirror,
-                                                     bio_flags);
+                                                     failrec->bio_flags);
        return 0;
 }
 
@@ -1688,9 +1691,14 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        u32 csum = ~(u32)0;
        unsigned long flags;
 
+       if (PageChecked(page)) {
+               ClearPageChecked(page);
+               goto good;
+       }
        if (btrfs_test_opt(root, NODATASUM) ||
            btrfs_test_flag(inode, NODATASUM))
                return 0;
+
        if (state && state->start == start) {
                private = state->private;
                ret = 0;
@@ -1709,7 +1717,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
        kunmap_atomic(kaddr, KM_IRQ0);
        local_irq_restore(flags);
-
+good:
        /* if the io failure tree for this inode is non-empty,
         * check to see if we've recovered from a failed IO
         */
@@ -2243,6 +2251,7 @@ fail:
        return err;
 }
 
+#if 0
 /*
  * when truncating bytes in a file, it is possible to avoid reading
  * the leaves that contain only checksum items.  This can be the
@@ -2410,6 +2419,8 @@ out:
        return ret;
 }
 
+#endif
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -2459,9 +2470,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
        btrfs_init_path(path);
 
-       ret = drop_csum_leaves(trans, root, path, inode, new_size);
-       BUG_ON(ret);
-
 search_again:
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0) {
@@ -2509,16 +2517,11 @@ search_again:
                        }
                        item_end--;
                }
-               if (found_type == BTRFS_CSUM_ITEM_KEY) {
-                       ret = btrfs_csum_truncate(trans, root, path,
-                                                 new_size);
-                       BUG_ON(ret);
-               }
                if (item_end < new_size) {
                        if (found_type == BTRFS_DIR_ITEM_KEY) {
                                found_type = BTRFS_INODE_ITEM_KEY;
                        } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
-                               found_type = BTRFS_CSUM_ITEM_KEY;
+                               found_type = BTRFS_EXTENT_DATA_KEY;
                        } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
                                found_type = BTRFS_XATTR_ITEM_KEY;
                        } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
index b4da53d..6228b69 100644 (file)
@@ -714,8 +714,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        u64 len = olen;
        u64 bs = root->fs_info->sb->s_blocksize;
        u64 hint_byte;
-       u16 csum_size =
-               btrfs_super_csum_size(&root->fs_info->super_copy);
+
        /*
         * TODO:
         * - split compressed inline extents.  annoying: we need to
@@ -833,7 +832,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                slot = path->slots[0];
 
                btrfs_item_key_to_cpu(leaf, &key, slot);
-               if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
+               if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
                    key.objectid != src->i_ino)
                        break;
 
@@ -958,56 +957,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        btrfs_mark_buffer_dirty(leaf);
                }
 
-               if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
-                       u32 size;
-                       struct btrfs_key new_key;
-                       u64 coverslen;
-                       int coff, clen;
-
-                       size = btrfs_item_size_nr(leaf, slot);
-                       coverslen = (size / csum_size) <<
-                               root->fs_info->sb->s_blocksize_bits;
-                       printk("csums for %llu~%llu\n",
-                              key.offset, coverslen);
-                       if (key.offset + coverslen < off ||
-                           key.offset >= off+len)
-                               goto next;
-
-                       read_extent_buffer(leaf, buf,
-                                          btrfs_item_ptr_offset(leaf, slot),
-                                          size);
-                       btrfs_release_path(root, path);
-
-                       coff = 0;
-                       if (off > key.offset)
-                               coff = ((off - key.offset) >>
-                                       root->fs_info->sb->s_blocksize_bits) *
-                                       csum_size;
-                       clen = size - coff;
-                       if (key.offset + coverslen > off+len)
-                               clen -= ((key.offset+coverslen-off-len) >>
-                                        root->fs_info->sb->s_blocksize_bits) *
-                                       csum_size;
-                       printk(" will dup %d~%d of %d\n",
-                              coff, clen, size);
-
-                       memcpy(&new_key, &key, sizeof(new_key));
-                       new_key.objectid = inode->i_ino;
-                       new_key.offset = key.offset + destoff - off;
-
-                       ret = btrfs_insert_empty_item(trans, root, path,
-                                                     &new_key, clen);
-                       if (ret)
-                               goto out;
-
-                       leaf = path->nodes[0];
-                       slot = path->slots[0];
-                       write_extent_buffer(leaf, buf + coff,
-                                           btrfs_item_ptr_offset(leaf, slot),
-                                           clen);
-                       btrfs_mark_buffer_dirty(leaf);
-               }
-
        next:
                btrfs_release_path(root, path);
                key.offset++;
index 027ad6b..d9e2322 100644 (file)
@@ -610,7 +610,8 @@ out:
  * try to find a checksum.  This is used because we allow pages to
  * be reclaimed before their checksum is actually put into the btree
  */
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+                          u32 *sum)
 {
        struct btrfs_ordered_sum *ordered_sum;
        struct btrfs_sector_sum *sector_sums;
@@ -629,11 +630,11 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
        mutex_lock(&tree->mutex);
        list_for_each_prev(cur, &ordered->list) {
                ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
-               if (offset >= ordered_sum->file_offset) {
+               if (disk_bytenr >= ordered_sum->bytenr) {
                        num_sectors = ordered_sum->len / sectorsize;
                        sector_sums = ordered_sum->sums;
                        for (i = 0; i < num_sectors; i++) {
-                               if (sector_sums[i].offset == offset) {
+                               if (sector_sums[i].bytenr == disk_bytenr) {
                                        *sum = sector_sums[i].sum;
                                        ret = 0;
                                        goto out;
index 260bf95..ab66d5e 100644 (file)
@@ -33,15 +33,17 @@ struct btrfs_ordered_inode_tree {
  * the ordered extent are on disk
  */
 struct btrfs_sector_sum {
-       u64 offset;
+       /* bytenr on disk */
+       u64 bytenr;
        u32 sum;
 };
 
 struct btrfs_ordered_sum {
-       u64 file_offset;
+       /* bytenr is the start of this extent on disk */
+       u64 bytenr;
+
        /*
         * this is the length in bytes covered by the sums array below.
-        * But, the sums array may not be contiguous in the file.
         */
        unsigned long len;
        struct list_head list;
@@ -147,7 +149,7 @@ struct btrfs_ordered_extent *
 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
 int btrfs_ordered_update_i_size(struct inode *inode,
                                struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
 int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
                                       pgoff_t start, pgoff_t end);
 int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
index c766649..08469ec 100644 (file)
@@ -934,24 +934,17 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
        unsigned long file_bytes;
        struct btrfs_ordered_sum *sums;
        struct btrfs_sector_sum *sector_sum;
-       struct inode *inode;
        unsigned long ptr;
 
        file_bytes = (item_size / csum_size) * root->sectorsize;
-       inode = read_one_inode(root, key->objectid);
-       if (!inode) {
-               return -EIO;
-       }
-
        sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
        if (!sums) {
-               iput(inode);
                return -ENOMEM;
        }
 
        INIT_LIST_HEAD(&sums->list);
        sums->len = file_bytes;
-       sums->file_offset = key->offset;
+       sums->bytenr = key->offset;
 
        /*
         * copy all the sums into the ordered sum struct
@@ -960,7 +953,7 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
        cur_offset = key->offset;
        ptr = btrfs_item_ptr_offset(eb, slot);
        while(item_size > 0) {
-               sector_sum->offset = cur_offset;
+               sector_sum->bytenr = cur_offset;
                read_extent_buffer(eb, &sector_sum->sum, ptr, csum_size);
                sector_sum++;
                item_size -= csum_size;
@@ -969,11 +962,9 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
        }
 
        /* let btrfs_csum_file_blocks add them into the file */
-       ret = btrfs_csum_file_blocks(trans, root, inode, sums);
+       ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
        BUG_ON(ret);
        kfree(sums);
-       iput(inode);
-
        return 0;
 }
 /*
@@ -1670,7 +1661,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                        ret = replay_one_extent(wc->trans, root, path,
                                                eb, i, &key);
                        BUG_ON(ret);
-               } else if (key.type == BTRFS_CSUM_ITEM_KEY) {
+               } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
                        ret = replay_one_csum(wc->trans, root, path,
                                              eb, i, &key);
                        BUG_ON(ret);
@@ -2466,6 +2457,85 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
+                                     struct list_head *list,
+                                     struct btrfs_root *root,
+                                     u64 disk_bytenr, u64 len)
+{
+       struct btrfs_ordered_sum *sums;
+       struct btrfs_sector_sum *sector_sum;
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_csum_item *item = NULL;
+       u64 end = disk_bytenr + len;
+       u64 item_start_offset = 0;
+       u64 item_last_offset = 0;
+       u32 diff;
+       u32 sum;
+       u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+
+       sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
+
+       sector_sum = sums->sums;
+       sums->bytenr = disk_bytenr;
+       sums->len = len;
+       list_add_tail(&sums->list, list);
+
+       path = btrfs_alloc_path();
+       while(disk_bytenr < end) {
+               if (!item || disk_bytenr < item_start_offset ||
+                   disk_bytenr >= item_last_offset) {
+                       struct btrfs_key found_key;
+                       u32 item_size;
+
+                       if (item)
+                               btrfs_release_path(root, path);
+                       item = btrfs_lookup_csum(NULL, root, path,
+                                                disk_bytenr, 0);
+                       if (IS_ERR(item)) {
+                               ret = PTR_ERR(item);
+                               if (ret == -ENOENT || ret == -EFBIG)
+                                       ret = 0;
+                               sum = 0;
+                               printk("log no csum found for byte %llu\n",
+                                      (unsigned long long)disk_bytenr);
+                               item = NULL;
+                               btrfs_release_path(root, path);
+                               goto found;
+                       }
+                       btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+                                             path->slots[0]);
+
+                       item_start_offset = found_key.offset;
+                       item_size = btrfs_item_size_nr(path->nodes[0],
+                                                      path->slots[0]);
+                       item_last_offset = item_start_offset +
+                               (item_size / csum_size) *
+                               root->sectorsize;
+                       item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                             struct btrfs_csum_item);
+               }
+               /*
+                * this byte range must be able to fit inside
+                * a single leaf so it will also fit inside a u32
+                */
+               diff = disk_bytenr - item_start_offset;
+               diff = diff / root->sectorsize;
+               diff = diff * csum_size;
+
+               read_extent_buffer(path->nodes[0], &sum,
+                                  ((unsigned long)item) + diff,
+                                  csum_size);
+found:
+               sector_sum->bytenr = disk_bytenr;
+               sector_sum->sum = sum;
+               disk_bytenr += root->sectorsize;
+               sector_sum++;
+       }
+       btrfs_free_path(path);
+       return 0;
+}
+
 static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *log,
                               struct btrfs_path *dst_path,
@@ -2481,6 +2551,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        u32 *ins_sizes;
        char *ins_data;
        int i;
+       struct list_head ordered_sums;
+
+       INIT_LIST_HEAD(&ordered_sums);
 
        ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
                           nr * sizeof(u32), GFP_NOFS);
@@ -2535,6 +2608,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                                   extent);
                                u64 dl = btrfs_file_extent_disk_num_bytes(src,
                                                                      extent);
+                               u64 cs = btrfs_file_extent_offset(src, extent);
+                               u64 cl = btrfs_file_extent_num_bytes(src,
+                                                                    extent);;
                                /* ds == 0 is a hole */
                                if (ds != 0) {
                                        ret = btrfs_inc_extent_ref(trans, log,
@@ -2544,6 +2620,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                   trans->transid,
                                                   ins_keys[i].objectid);
                                        BUG_ON(ret);
+                                       ret = copy_extent_csums(trans,
+                                               &ordered_sums,
+                                               log->fs_info->csum_root,
+                                               ds + cs, cl);
+                                       BUG_ON(ret);
                                }
                        }
                }
@@ -2553,6 +2634,20 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(dst_path->nodes[0]);
        btrfs_release_path(log, dst_path);
        kfree(ins_data);
+
+       /*
+        * we have to do this after the loop above to avoid changing the
+        * log tree while trying to change the log tree.
+        */
+       while(!list_empty(&ordered_sums)) {
+               struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+                                                  struct btrfs_ordered_sum,
+                                                  list);
+               ret = btrfs_csum_file_blocks(trans, log, sums);
+               BUG_ON(ret);
+               list_del(&sums->list);
+               kfree(sums);
+       }
        return 0;
 }
 
index 6c523b3..2049d17 100644 (file)
@@ -2771,6 +2771,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
        device->work.func = pending_bios_fn;
        fs_devices->num_devices++;
        spin_lock_init(&device->io_lock);
+       INIT_LIST_HEAD(&device->dev_alloc_list);
        memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
        return device;
 }