Btrfs: leave btree locks spinning more often
Chris Mason [Fri, 13 Mar 2009 15:00:37 +0000 (11:00 -0400)]
btrfs_mark_buffer dirty would set dirty bits in the extent_io tree
for the buffers it was dirtying.  This may require a kmalloc and it
was not atomic.  So, anyone who called btrfs_mark_buffer_dirty had to
set any btree locks they were holding to blocking first.

This commit changes dirty tracking for extent buffers to just use a flag
in the extent buffer.  Now that we have one and only one extent buffer
per page, this can be safely done without losing dirty bits along the way.

This also introduces a path->leave_spinning flag that callers of
btrfs_search_slot can use to indicate they will properly deal with a
path returned where all the locks are spinning instead of blocking.

Many of the btree search callers now expect spinning paths,
resulting in better btree concurrency overall.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

14 files changed:
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode-item.c
fs/btrfs/inode.c
fs/btrfs/locking.c
fs/btrfs/tree-log.c

index 3764248..8686a3d 100644 (file)
@@ -1684,7 +1684,8 @@ done:
         * we don't really know what they plan on doing with the path
         * from here on, so for now just mark it as blocking
         */
-       btrfs_set_path_blocking(p);
+       if (!p->leave_spinning)
+               btrfs_set_path_blocking(p);
        return ret;
 }
 
@@ -3032,26 +3033,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
                return -EAGAIN;
        }
 
+       btrfs_set_path_blocking(path);
        ret = split_leaf(trans, root, &orig_key, path,
                         sizeof(struct btrfs_item), 1);
        path->keep_locks = 0;
        BUG_ON(ret);
 
+       btrfs_unlock_up_safe(path, 1);
+       leaf = path->nodes[0];
+       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+
+split:
        /*
         * make sure any changes to the path from split_leaf leave it
         * in a blocking state
         */
        btrfs_set_path_blocking(path);
 
-       leaf = path->nodes[0];
-       BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-
-split:
        item = btrfs_item_nr(leaf, path->slots[0]);
        orig_offset = btrfs_item_offset(leaf, item);
        item_size = btrfs_item_size(leaf, item);
 
-
        buf = kmalloc(item_size, GFP_NOFS);
        read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
                            path->slots[0]), item_size);
@@ -3545,7 +3547,6 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
        }
 
        btrfs_set_header_nritems(leaf, nritems + nr);
-       btrfs_mark_buffer_dirty(leaf);
 
        ret = 0;
        if (slot == 0) {
@@ -3553,6 +3554,8 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
        }
+       btrfs_unlock_up_safe(path, 1);
+       btrfs_mark_buffer_dirty(leaf);
 
        if (btrfs_leaf_free_space(root, leaf) < 0) {
                btrfs_print_leaf(root, leaf);
@@ -3596,7 +3599,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                               total_data, total_size, nr);
 
 out:
-       btrfs_unlock_up_safe(path, 1);
        return ret;
 }
 
@@ -3792,6 +3794,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        slot = path->slots[1];
                        extent_buffer_get(leaf);
 
+                       btrfs_set_path_blocking(path);
                        wret = push_leaf_left(trans, root, path, 1, 1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
index 08d9f8d..4ddce91 100644 (file)
@@ -401,15 +401,16 @@ struct btrfs_path {
        int locks[BTRFS_MAX_LEVEL];
        int reada;
        /* keep some upper locks as we walk down */
-       int keep_locks;
-       int skip_locking;
        int lowest_level;
 
        /*
         * set by btrfs_split_item, tells search_slot to keep all locks
         * and to force calls to keep space in the nodes
         */
-       int search_for_split;
+       unsigned int search_for_split:1;
+       unsigned int keep_locks:1;
+       unsigned int skip_locking:1;
+       unsigned int leave_spinning:1;
 };
 
 /*
@@ -779,6 +780,11 @@ struct btrfs_fs_info {
        atomic_t throttle_gen;
 
        u64 total_pinned;
+
+       /* protected by the delalloc lock, used to keep from writing
+        * metadata until there is a nice batch
+        */
+       u64 dirty_metadata_bytes;
        struct list_head dirty_cowonly_roots;
 
        struct btrfs_fs_devices *fs_devices;
index 926a0b2..1d70236 100644 (file)
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
        key.objectid = dir;
        btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
        key.offset = btrfs_name_hash(name, name_len);
+
        path = btrfs_alloc_path();
+       path->leave_spinning = 1;
+
        data_size = sizeof(*dir_item) + name_len;
        dir_item = insert_with_overflow(trans, root, path, &key, data_size,
                                        name, name_len);
index 1f1d89b..9244cd7 100644 (file)
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
        struct extent_io_tree *tree;
+       struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+       struct extent_buffer *eb;
+       int was_dirty;
+
        tree = &BTRFS_I(page->mapping->host)->io_tree;
+       if (!(current->flags & PF_MEMALLOC)) {
+               return extent_write_full_page(tree, page,
+                                             btree_get_extent, wbc);
+       }
 
-       if (current->flags & PF_MEMALLOC) {
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return 0;
+       redirty_page_for_writepage(wbc, page);
+       eb = btrfs_find_tree_block(root, page_offset(page),
+                                     PAGE_CACHE_SIZE);
+       WARN_ON(!eb);
+
+       was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+       if (!was_dirty) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
+               spin_unlock(&root->fs_info->delalloc_lock);
        }
-       return extent_write_full_page(tree, page, btree_get_extent, wbc);
+       free_extent_buffer(eb);
+
+       unlock_page(page);
+       return 0;
 }
 
 static int btree_writepages(struct address_space *mapping,
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
        struct extent_io_tree *tree;
        tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
+               struct btrfs_root *root = BTRFS_I(mapping->host)->root;
                u64 num_dirty;
-               u64 start = 0;
                unsigned long thresh = 32 * 1024 * 1024;
 
                if (wbc->for_kupdate)
                        return 0;
 
-               num_dirty = count_range_bits(tree, &start, (u64)-1,
-                                            thresh, EXTENT_DIRTY);
+               /* this is a bit racy, but that's ok */
+               num_dirty = root->fs_info->dirty_metadata_bytes;
                if (num_dirty < thresh)
                        return 0;
        }
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
            root->fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
 
-               /* ugh, clear_extent_buffer_dirty can be expensive */
-               btrfs_set_lock_blocking(buf);
+               if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
+                       spin_lock(&root->fs_info->delalloc_lock);
+                       if (root->fs_info->dirty_metadata_bytes >= buf->len)
+                               root->fs_info->dirty_metadata_bytes -= buf->len;
+                       else
+                               WARN_ON(1);
+                       spin_unlock(&root->fs_info->delalloc_lock);
+               }
 
+               /* ugh, clear_extent_buffer_dirty needs to lock the page */
+               btrfs_set_lock_blocking(buf);
                clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
                                          buf);
        }
@@ -2348,8 +2373,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
-
-       btrfs_set_lock_blocking(buf);
+       int was_dirty;
 
        btrfs_assert_tree_locked(buf);
        if (transid != root->fs_info->generation) {
@@ -2360,7 +2384,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
                        (unsigned long long)root->fs_info->generation);
                WARN_ON(1);
        }
-       set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
+       was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
+                                           buf);
+       if (!was_dirty) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               root->fs_info->dirty_metadata_bytes += buf->len;
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
 }
 
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2400,6 +2430,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 int btree_lock_page_hook(struct page *page)
 {
        struct inode *inode = page->mapping->host;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_buffer *eb;
        unsigned long len;
@@ -2415,6 +2446,16 @@ int btree_lock_page_hook(struct page *page)
 
        btrfs_tree_lock(eb);
        btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+
+       if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+               spin_lock(&root->fs_info->delalloc_lock);
+               if (root->fs_info->dirty_metadata_bytes >= eb->len)
+                       root->fs_info->dirty_metadata_bytes -= eb->len;
+               else
+                       WARN_ON(1);
+               spin_unlock(&root->fs_info->delalloc_lock);
+       }
+
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
 out:
index 95029db..c958ecb 100644 (file)
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
+void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
 int wait_on_tree_block_writeback(struct btrfs_root *root,
index a421c32..8933d15 100644 (file)
@@ -56,9 +56,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
                                         int ref_mod);
 static int update_reserved_extents(struct btrfs_root *root,
                                   u64 bytenr, u64 num, int reserve);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, int is_data);
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc,
@@ -618,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
        } else {
                goto out;
        }
+       btrfs_unlock_up_safe(path, 1);
        btrfs_mark_buffer_dirty(path->nodes[0]);
 out:
        btrfs_release_path(root, path);
@@ -760,6 +758,7 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
+       path->leave_spinning = 1;
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
        key.offset = num_bytes;
@@ -767,8 +766,10 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
        /* first find the extent item and update its reference count */
        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
                                path, 0, 1);
-       if (ret < 0)
+       if (ret < 0) {
+               btrfs_set_path_blocking(path);
                return ret;
+       }
 
        if (ret > 0) {
                WARN_ON(1);
@@ -791,11 +792,15 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
 
        refs = btrfs_extent_refs(l, item);
        btrfs_set_extent_refs(l, item, refs + refs_to_add);
+       btrfs_unlock_up_safe(path, 1);
+
        btrfs_mark_buffer_dirty(path->nodes[0]);
 
        btrfs_release_path(root->fs_info->extent_root, path);
 
        path->reada = 1;
+       path->leave_spinning = 1;
+
        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
                                    path, bytenr, parent,
@@ -2050,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
                clear_extent_dirty(&fs_info->pinned_extents,
                                bytenr, bytenr + num - 1, GFP_NOFS);
        }
+       mutex_unlock(&root->fs_info->pinned_mutex);
+
        while (num > 0) {
                cache = btrfs_lookup_block_group(fs_info, bytenr);
                BUG_ON(!cache);
@@ -2141,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
        u64 end;
        int ret;
 
-       mutex_lock(&root->fs_info->pinned_mutex);
        while (1) {
+               mutex_lock(&root->fs_info->pinned_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY);
                if (ret)
@@ -2150,14 +2157,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
                ret = btrfs_discard_extent(root, start, end + 1 - start);
 
+               /* unlocks the pinned mutex */
                btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
 
-               if (need_resched()) {
-                       mutex_unlock(&root->fs_info->pinned_mutex);
-                       cond_resched();
-                       mutex_lock(&root->fs_info->pinned_mutex);
-               }
+               cond_resched();
        }
        mutex_unlock(&root->fs_info->pinned_mutex);
        return ret;
@@ -2165,7 +2169,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
 static int pin_down_bytes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, int is_data)
+                         struct btrfs_path *path,
+                         u64 bytenr, u64 num_bytes, int is_data,
+                         struct extent_buffer **must_clean)
 {
        int err = 0;
        struct extent_buffer *buf;
@@ -2191,15 +2197,16 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
                    header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
                    header_transid == trans->transid &&
                    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
-                       clean_tree_block(NULL, root, buf);
-                       btrfs_tree_unlock(buf);
-                       free_extent_buffer(buf);
+                       *must_clean = buf;
                        return 1;
                }
                btrfs_tree_unlock(buf);
        }
        free_extent_buffer(buf);
 pinit:
+       btrfs_set_path_blocking(path);
+       mutex_lock(&root->fs_info->pinned_mutex);
+       /* unlocks the pinned mutex */
        btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
 
        BUG_ON(err < 0);
@@ -2236,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->reada = 1;
+       path->leave_spinning = 1;
        ret = lookup_extent_backref(trans, extent_root, path,
                                    bytenr, parent, root_objectid,
                                    ref_generation, owner_objectid, 1);
@@ -2261,6 +2269,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                                    refs_to_drop);
                        BUG_ON(ret);
                        btrfs_release_path(extent_root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root,
                                                &key, path, -1, 1);
                        if (ret) {
@@ -2318,6 +2327,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                /* if refs are 0, we need to setup the path for deletion */
                if (refs == 0) {
                        btrfs_release_path(extent_root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_search_slot(trans, extent_root, &key, path,
                                                -1, 1);
                        BUG_ON(ret);
@@ -2327,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        if (refs == 0) {
                u64 super_used;
                u64 root_used;
+               struct extent_buffer *must_clean = NULL;
 
                if (pin) {
-                       mutex_lock(&root->fs_info->pinned_mutex);
-                       ret = pin_down_bytes(trans, root, bytenr, num_bytes,
-                               owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
-                       mutex_unlock(&root->fs_info->pinned_mutex);
+                       ret = pin_down_bytes(trans, root, path,
+                               bytenr, num_bytes,
+                               owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
+                               &must_clean);
                        if (ret > 0)
                                mark_free = 1;
                        BUG_ON(ret < 0);
                }
+
                /* block accounting for super block */
                spin_lock(&info->delalloc_lock);
                super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -2348,11 +2360,27 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                btrfs_set_root_used(&root->root_item,
                                           root_used - num_bytes);
                spin_unlock(&info->delalloc_lock);
+
+               /*
+                * it is going to be very rare for someone to be waiting
+                * on the block we're freeing.  del_items might need to
+                * schedule, so rather than get fancy, just force it
+                * to blocking here
+                */
+               if (must_clean)
+                       btrfs_set_lock_blocking(must_clean);
+
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                BUG_ON(ret);
                btrfs_release_path(extent_root, path);
 
+               if (must_clean) {
+                       clean_tree_block(NULL, root, must_clean);
+                       btrfs_tree_unlock(must_clean);
+                       free_extent_buffer(must_clean);
+               }
+
                if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
                        BUG_ON(ret);
@@ -2480,8 +2508,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
            owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
                mutex_lock(&root->fs_info->pinned_mutex);
+
+               /* unlocks the pinned mutex */
                btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-               mutex_unlock(&root->fs_info->pinned_mutex);
                update_reserved_extents(root, bytenr, num_bytes, 0);
                ret = 0;
        } else {
@@ -2931,6 +2960,7 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
                                       sizes, 2);
        BUG_ON(ret);
@@ -5435,6 +5465,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_inode(trans, root, path, objectid);
        if (ret)
                goto out;
index ebe6b29..08085af 100644 (file)
@@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb)
 int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb)
 {
-       int set;
        unsigned long i;
        unsigned long num_pages;
        struct page *page;
 
-       u64 start = eb->start;
-       u64 end = start + eb->len - 1;
-
-       set = clear_extent_dirty(tree, start, end, GFP_NOFS);
        num_pages = num_extent_pages(eb->start, eb->len);
 
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
-               if (!set && !PageDirty(page))
+               if (!PageDirty(page))
                        continue;
 
                lock_page(page);
@@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                else
                        set_page_private(page, EXTENT_PAGE_PRIVATE);
 
-               /*
-                * if we're on the last page or the first page and the
-                * block isn't aligned on a page boundary, do extra checks
-                * to make sure we don't clean page that is partially dirty
-                */
-               if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
-                   ((i == num_pages - 1) &&
-                    ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
-                       start = (u64)page->index << PAGE_CACHE_SHIFT;
-                       end  = start + PAGE_CACHE_SIZE - 1;
-                       if (test_range_bit(tree, start, end,
-                                          EXTENT_DIRTY, 0)) {
-                               unlock_page(page);
-                               continue;
-                       }
-               }
                clear_page_dirty_for_io(page);
                spin_lock_irq(&page->mapping->tree_lock);
                if (!PageDirty(page)) {
@@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
 {
        unsigned long i;
        unsigned long num_pages;
+       int was_dirty = 0;
 
+       was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
        num_pages = num_extent_pages(eb->start, eb->len);
-       for (i = 0; i < num_pages; i++) {
-               struct page *page = extent_buffer_page(eb, i);
-               /* writepage may need to do something special for the
-                * first page, we have to make sure page->private is
-                * properly set.  releasepage may drop page->private
-                * on us if the page isn't already dirty.
-                */
-               lock_page(page);
-               if (i == 0) {
-                       set_page_extent_head(page, eb->len);
-               } else if (PagePrivate(page) &&
-                          page->private != EXTENT_PAGE_PRIVATE) {
-                       set_page_extent_mapped(page);
-               }
+       for (i = 0; i < num_pages; i++)
                __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
-               set_extent_dirty(tree, page_offset(page),
-                                page_offset(page) + PAGE_CACHE_SIZE - 1,
-                                GFP_NOFS);
-               unlock_page(page);
-       }
-       return 0;
+       return was_dirty;
 }
 
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
                ret = 0;
                goto out;
        }
+       if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+               ret = 0;
+               goto out;
+       }
        /* at this point we can safely release the extent buffer */
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++)
index 1f9df88..5bc20ab 100644 (file)
@@ -25,6 +25,7 @@
 /* these are bit numbers for test/set bit */
 #define EXTENT_BUFFER_UPTODATE 0
 #define EXTENT_BUFFER_BLOCKING 1
+#define EXTENT_BUFFER_DIRTY 2
 
 /*
  * page->private values.  Every page that is controlled by the extent
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
                              struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_io_tree *tree,
                             struct extent_buffer *eb);
+int test_extent_buffer_dirty(struct extent_io_tree *tree,
+                            struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
                               struct extent_buffer *eb);
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
index 9646524..9b99886 100644 (file)
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
        file_key.offset = pos;
        btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      sizeof(*item));
        if (ret < 0)
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                key.offset = end_byte - 1;
                key.type = BTRFS_EXTENT_CSUM_KEY;
 
+               path->leave_spinning = 1;
                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
                if (ret > 0) {
                        if (path->slots[0] == 0)
@@ -757,8 +759,10 @@ insert:
        } else {
                ins_size = csum_size;
        }
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &file_key,
                                      ins_size);
+       path->leave_spinning = 0;
        if (ret < 0)
                goto fail_unlock;
        if (ret != 0) {
@@ -776,7 +780,6 @@ found:
        item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
                                      btrfs_item_size_nr(leaf, path->slots[0]));
        eb_token = NULL;
-       cond_resched();
 next_sector:
 
        if (!eb_token ||
@@ -817,9 +820,9 @@ next_sector:
                eb_token = NULL;
        }
        btrfs_mark_buffer_dirty(path->nodes[0]);
-       cond_resched();
        if (total_bytes < sums->len) {
                btrfs_release_path(root, path);
+               cond_resched();
                goto again;
        }
 out:
index c800754..f06c275 100644 (file)
@@ -606,6 +606,7 @@ next_slot:
                        btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
 
                        btrfs_release_path(root, path);
+                       path->leave_spinning = 1;
                        ret = btrfs_insert_empty_item(trans, root, path, &ins,
                                                      sizeof(*extent));
                        BUG_ON(ret);
@@ -639,7 +640,9 @@ next_slot:
                                                        ram_bytes);
                        btrfs_set_file_extent_type(leaf, extent, found_type);
 
+                       btrfs_unlock_up_safe(path, 1);
                        btrfs_mark_buffer_dirty(path->nodes[0]);
+                       btrfs_set_lock_blocking(path->nodes[0]);
 
                        if (disk_bytenr != 0) {
                                ret = btrfs_update_extent_ref(trans, root,
@@ -652,6 +655,7 @@ next_slot:
 
                                BUG_ON(ret);
                        }
+                       path->leave_spinning = 0;
                        btrfs_release_path(root, path);
                        if (disk_bytenr != 0)
                                inode_add_bytes(inode, extent_end - end);
index 3d46fa1..6b627c6 100644 (file)
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
+
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0) {
                ret = -ENOENT;
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      ins_len);
        if (ret == -EEXIST) {
index c427011..b83a45d 100644 (file)
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
+       path->leave_spinning = 1;
        btrfs_set_trans_block_group(trans, inode);
 
        key.objectid = inode->i_ino;
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                        cur_size = min_t(unsigned long, compressed_size,
                                       PAGE_CACHE_SIZE);
 
-                       kaddr = kmap(cpage);
+                       kaddr = kmap_atomic(cpage, KM_USER0);
                        write_extent_buffer(leaf, kaddr, ptr, cur_size);
-                       kunmap(cpage);
+                       kunmap_atomic(kaddr, KM_USER0);
 
                        i++;
                        ptr += cur_size;
@@ -1452,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
+       path->leave_spinning = 1;
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                 file_pos + num_bytes, file_pos, &hint);
        BUG_ON(ret);
@@ -1474,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_set_file_extent_compression(leaf, fi, compression);
        btrfs_set_file_extent_encryption(leaf, fi, encryption);
        btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+
+       btrfs_unlock_up_safe(path, 1);
+       btrfs_set_lock_blocking(leaf);
+
        btrfs_mark_buffer_dirty(leaf);
 
        inode_add_bytes(inode, num_bytes);
@@ -1486,8 +1492,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                          root->root_key.objectid,
                                          trans->transid, inode->i_ino, &ins);
        BUG_ON(ret);
-
        btrfs_free_path(path);
+
        return 0;
 }
 
@@ -2118,6 +2124,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
+       path->leave_spinning = 1;
        ret = btrfs_lookup_inode(trans, root, path,
                                 &BTRFS_I(inode)->location, 1);
        if (ret) {
@@ -2164,6 +2171,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                goto err;
        }
 
+       path->leave_spinning = 1;
        di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
                                    name, name_len, -1);
        if (IS_ERR(di)) {
@@ -2515,6 +2523,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        key.type = (u8)-1;
 
 search_again:
+       path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0)
                goto error;
@@ -2661,6 +2670,7 @@ delete:
                        break;
                }
                if (found_extent) {
+                       btrfs_set_path_blocking(path);
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes,
                                                leaf->start, root_owner,
@@ -3466,6 +3476,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        sizes[0] = sizeof(struct btrfs_inode_item);
        sizes[1] = name_len + sizeof(*ref);
 
+       path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
        if (ret != 0)
                goto fail;
index 6d8db2f..a5310c0 100644 (file)
@@ -96,11 +96,12 @@ int btrfs_try_spin_lock(struct extent_buffer *eb)
 {
        int i;
 
-       spin_nested(eb);
-       if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
-               return 1;
-       spin_unlock(&eb->lock);
-
+       if (btrfs_spin_on_block(eb)) {
+               spin_nested(eb);
+               if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+                       return 1;
+               spin_unlock(&eb->lock);
+       }
        /* spin for a bit on the BLOCKING flag */
        for (i = 0; i < 2; i++) {
                cpu_relax();
index 9c462fb..a93934f 100644 (file)
@@ -203,7 +203,6 @@ static int process_one_buffer(struct btrfs_root *log,
                mutex_lock(&log->fs_info->pinned_mutex);
                btrfs_update_pinned_extents(log->fs_info->extent_root,
                                            eb->start, eb->len, 1);
-               mutex_unlock(&log->fs_info->pinned_mutex);
        }
 
        if (btrfs_buffer_uptodate(eb, gen)) {