]> nv-tegra.nvidia Code Review - linux-2.6.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Jul 2009 21:27:06 +0000 (14:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Jul 2009 21:27:06 +0000 (14:27 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (22 commits)
  Btrfs: Fix async caching interaction with unmount
  Btrfs: change how we unpin extents
  Btrfs: Correct redundant test in add_inode_ref
  Btrfs: find smallest available device extent during chunk allocation
  Btrfs: clear all space_info->full after removing a block group
  Btrfs: make flushoncommit mount option correctly wait on ordered_extents
  Btrfs: Avoid delayed reference update looping
  Btrfs: Fix ordering of key field checks in btrfs_previous_item
  Btrfs: find_free_dev_extent doesn't handle holes at the start of the device
  Btrfs: Remove code duplication in comp_keys
  Btrfs: async block group caching
  Btrfs: use hybrid extents+bitmap rb tree for free space
  Btrfs: Fix crash on read failures at mount
  Btrfs: remove of redundant btrfs_header_level
  Btrfs: adjust NULL test
  Btrfs: Remove broken sanity check from btrfs_rmap_block()
  Btrfs: convert nested spin_lock_irqsave to spin_lock
  Btrfs: make sure all dirty blocks are written at commit time
  Btrfs: fix locking issue in btrfs_find_next_key
  Btrfs: fix double increment of path->slots[0] in btrfs_next_leaf
  ...

13 files changed:
fs/btrfs/async-thread.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode.c
fs/btrfs/print-tree.c
fs/btrfs/relocation.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c

index 6e4f6c50a120dfcb78eb5f26dc4301936589cd0f..019e8af449abfeb1679c1a20c801d6b0082edf8b 100644 (file)
@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work)
         * list
         */
        if (worker->idle) {
-               spin_lock_irqsave(&worker->workers->lock, flags);
+               spin_lock(&worker->workers->lock);
                worker->idle = 0;
                list_move_tail(&worker->worker_list,
                               &worker->workers->worker_list);
-               spin_unlock_irqrestore(&worker->workers->lock, flags);
+               spin_unlock(&worker->workers->lock);
        }
        if (!worker->working) {
                wake = 1;
index 60a45f3a4e916fe9af5a8c644b3a8eb9b5b73f81..3fdcc0512d3ab62f95d42708ca0d6a049340b877 100644 (file)
@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 
        btrfs_disk_key_to_cpu(&k1, disk);
 
-       if (k1.objectid > k2->objectid)
-               return 1;
-       if (k1.objectid < k2->objectid)
-               return -1;
-       if (k1.type > k2->type)
-               return 1;
-       if (k1.type < k2->type)
-               return -1;
-       if (k1.offset > k2->offset)
-               return 1;
-       if (k1.offset < k2->offset)
-               return -1;
-       return 0;
+       return btrfs_comp_cpu_keys(&k1, k2);
 }
 
 /*
@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
 
-       if (btrfs_header_nritems(mid) > 2)
-               return 0;
-
        if (btrfs_header_nritems(mid) < 2)
                err_on_enospc = 1;
 
@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        struct extent_buffer *b;
        int slot;
        int ret;
+       int err;
        int level;
        int lowest_unlock = 1;
        u8 lowest_level = 0;
@@ -1737,8 +1723,6 @@ again:
                        p->locks[level] = 1;
 
                if (cow) {
-                       int wret;
-
                        /*
                         * if we don't really need to cow this block
                         * then we don't want to set the path blocking,
@@ -1749,12 +1733,12 @@ again:
 
                        btrfs_set_path_blocking(p);
 
-                       wret = btrfs_cow_block(trans, root, b,
-                                              p->nodes[level + 1],
-                                              p->slots[level + 1], &b);
-                       if (wret) {
+                       err = btrfs_cow_block(trans, root, b,
+                                             p->nodes[level + 1],
+                                             p->slots[level + 1], &b);
+                       if (err) {
                                free_extent_buffer(b);
-                               ret = wret;
+                               ret = err;
                                goto done;
                        }
                }
@@ -1793,41 +1777,45 @@ cow_done:
                ret = bin_search(b, key, level, &slot);
 
                if (level != 0) {
-                       if (ret && slot > 0)
+                       int dec = 0;
+                       if (ret && slot > 0) {
+                               dec = 1;
                                slot -= 1;
+                       }
                        p->slots[level] = slot;
-                       ret = setup_nodes_for_search(trans, root, p, b, level,
+                       err = setup_nodes_for_search(trans, root, p, b, level,
                                                     ins_len);
-                       if (ret == -EAGAIN)
+                       if (err == -EAGAIN)
                                goto again;
-                       else if (ret)
+                       if (err) {
+                               ret = err;
                                goto done;
+                       }
                        b = p->nodes[level];
                        slot = p->slots[level];
 
                        unlock_up(p, level, lowest_unlock);
 
-                       /* this is only true while dropping a snapshot */
                        if (level == lowest_level) {
-                               ret = 0;
+                               if (dec)
+                                       p->slots[level]++;
                                goto done;
                        }
 
-                       ret = read_block_for_search(trans, root, p,
+                       err = read_block_for_search(trans, root, p,
                                                    &b, level, slot, key);
-                       if (ret == -EAGAIN)
+                       if (err == -EAGAIN)
                                goto again;
-
-                       if (ret == -EIO)
+                       if (err) {
+                               ret = err;
                                goto done;
+                       }
 
                        if (!p->skip_locking) {
-                               int lret;
-
                                btrfs_clear_path_blocking(p, NULL);
-                               lret = btrfs_try_spin_lock(b);
+                               err = btrfs_try_spin_lock(b);
 
-                               if (!lret) {
+                               if (!err) {
                                        btrfs_set_path_blocking(p);
                                        btrfs_tree_lock(b);
                                        btrfs_clear_path_blocking(p, b);
@@ -1837,16 +1825,14 @@ cow_done:
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
                            btrfs_leaf_free_space(root, b) < ins_len) {
-                               int sret;
-
                                btrfs_set_path_blocking(p);
-                               sret = split_leaf(trans, root, key,
-                                                     p, ins_len, ret == 0);
+                               err = split_leaf(trans, root, key,
+                                                p, ins_len, ret == 0);
                                btrfs_clear_path_blocking(p, NULL);
 
-                               BUG_ON(sret > 0);
-                               if (sret) {
-                                       ret = sret;
+                               BUG_ON(err > 0);
+                               if (err) {
+                                       ret = err;
                                        goto done;
                                }
                        }
@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                }
 
                /* delete the leaf if it is mostly empty */
-               if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) {
+               if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
                        /* push_leaf_left fixes the path.
                         * make sure the path still points to our leaf
                         * for possible call to del_ptr below
@@ -4042,10 +4028,9 @@ out:
  * calling this function.
  */
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
-                       struct btrfs_key *key, int lowest_level,
+                       struct btrfs_key *key, int level,
                        int cache_only, u64 min_trans)
 {
-       int level = lowest_level;
        int slot;
        struct extent_buffer *c;
 
@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
                c = path->nodes[level];
 next:
                if (slot >= btrfs_header_nritems(c)) {
-                       level++;
-                       if (level == BTRFS_MAX_LEVEL)
+                       int ret;
+                       int orig_lowest;
+                       struct btrfs_key cur_key;
+                       if (level + 1 >= BTRFS_MAX_LEVEL ||
+                           !path->nodes[level + 1])
                                return 1;
-                       continue;
+
+                       if (path->locks[level + 1]) {
+                               level++;
+                               continue;
+                       }
+
+                       slot = btrfs_header_nritems(c) - 1;
+                       if (level == 0)
+                               btrfs_item_key_to_cpu(c, &cur_key, slot);
+                       else
+                               btrfs_node_key_to_cpu(c, &cur_key, slot);
+
+                       orig_lowest = path->lowest_level;
+                       btrfs_release_path(root, path);
+                       path->lowest_level = level;
+                       ret = btrfs_search_slot(NULL, root, &cur_key, path,
+                                               0, 0);
+                       path->lowest_level = orig_lowest;
+                       if (ret < 0)
+                               return ret;
+
+                       c = path->nodes[level];
+                       slot = path->slots[level];
+                       if (ret == 0)
+                               slot++;
+                       goto next;
                }
+
                if (level == 0)
                        btrfs_item_key_to_cpu(c, key, slot);
                else {
@@ -4146,7 +4160,8 @@ again:
         * advance the path if there are now more items available.
         */
        if (nritems > 0 && path->slots[0] < nritems - 1) {
-               path->slots[0]++;
+               if (ret == 0)
+                       path->slots[0]++;
                ret = 0;
                goto done;
        }
@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root,
                        path->slots[0]--;
 
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.type == type)
-                       return 0;
                if (found_key.objectid < min_objectid)
                        break;
+               if (found_key.type == type)
+                       return 0;
                if (found_key.objectid == min_objectid &&
                    found_key.type < type)
                        break;
index 98a87383871722fbddc36590cae9e60a512ef53a..215ef8cae823ce53cf91e559ebfe7ed602eda214 100644 (file)
@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref {
 
 struct btrfs_extent_inline_ref {
        u8 type;
-       u64 offset;
+       __le64 offset;
 } __attribute__ ((__packed__));
 
 /* old style backrefs item */
@@ -689,6 +689,7 @@ struct btrfs_space_info {
        struct list_head block_groups;
        spinlock_t lock;
        struct rw_semaphore groups_sem;
+       atomic_t caching_threads;
 };
 
 /*
@@ -707,6 +708,9 @@ struct btrfs_free_cluster {
        /* first extent starting offset */
        u64 window_start;
 
+       /* if this cluster simply points at a bitmap in the block group */
+       bool points_to_bitmap;
+
        struct btrfs_block_group_cache *block_group;
        /*
         * when a cluster is allocated from a block group, we put the
@@ -716,24 +720,37 @@ struct btrfs_free_cluster {
        struct list_head block_group_list;
 };
 
+enum btrfs_caching_type {
+       BTRFS_CACHE_NO          = 0,
+       BTRFS_CACHE_STARTED     = 1,
+       BTRFS_CACHE_FINISHED    = 2,
+};
+
 struct btrfs_block_group_cache {
        struct btrfs_key key;
        struct btrfs_block_group_item item;
+       struct btrfs_fs_info *fs_info;
        spinlock_t lock;
-       struct mutex cache_mutex;
        u64 pinned;
        u64 reserved;
        u64 flags;
-       int cached;
+       u64 sectorsize;
+       int extents_thresh;
+       int free_extents;
+       int total_bitmaps;
        int ro;
        int dirty;
 
+       /* cache tracking stuff */
+       wait_queue_head_t caching_q;
+       int cached;
+
        struct btrfs_space_info *space_info;
 
        /* free space cache stuff */
        spinlock_t tree_lock;
-       struct rb_root free_space_bytes;
        struct rb_root free_space_offset;
+       u64 free_space;
 
        /* block group cache stuff */
        struct rb_node cache_node;
@@ -942,6 +959,9 @@ struct btrfs_root {
        /* the node lock is held while changing the node pointer */
        spinlock_t node_lock;
 
+       /* taken when updating the commit root */
+       struct rw_semaphore commit_root_sem;
+
        struct extent_buffer *commit_root;
        struct btrfs_root *log_root;
        struct btrfs_root *reloc_root;
@@ -1988,6 +2008,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
                                 u64 bytes);
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
                              u64 bytes);
+void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
index d28d29c95f7ca23a91e9ccf3c306840484ea29ed..7dcaa8138864b455ea0314bcf78d17fe820c2f5d 100644 (file)
@@ -909,6 +909,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        spin_lock_init(&root->inode_lock);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
+       init_rwsem(&root->commit_root_sem);
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
@@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                                           btrfs_super_chunk_root(disk_super),
                                           blocksize, generation);
        BUG_ON(!chunk_root->node);
+       if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+               printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+                      sb->s_id);
+               goto fail_chunk_root;
+       }
        btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
        chunk_root->commit_root = btrfs_root_node(chunk_root);
 
@@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                                          blocksize, generation);
        if (!tree_root->node)
                goto fail_chunk_root;
+       if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+               printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+                      sb->s_id);
+               goto fail_tree_root;
+       }
        btrfs_set_root_node(&tree_root->root_item, tree_root->node);
        tree_root->commit_root = btrfs_root_node(tree_root);
 
@@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root)
                        printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
        }
 
+       fs_info->closing = 2;
+       smp_mb();
+
        if (fs_info->delalloc_bytes) {
                printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
                       (unsigned long long)fs_info->delalloc_bytes);
@@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root)
        free_extent_buffer(root->fs_info->csum_root->commit_root);
 
        btrfs_free_block_groups(root->fs_info);
+       btrfs_free_pinned_extents(root->fs_info);
 
        del_fs_roots(fs_info);
 
index a5aca3997d426da09c87e33642916284eca135b6..fadf69a2764b316828f82d31be8490c0dcf57739 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/blkdev.h>
 #include <linux/sort.h>
 #include <linux/rcupdate.h>
+#include <linux/kthread.h>
 #include "compat.h"
 #include "hash.h"
 #include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags, int force);
 
+static noinline int
+block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+       smp_mb();
+       return cache->cached == BTRFS_CACHE_FINISHED;
+}
+
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 {
        return (cache->flags & bits) == bits;
@@ -145,21 +153,71 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
        return ret;
 }
 
+/*
+ * We always set EXTENT_LOCKED for the super mirror extents so we don't
+ * overwrite them, so those bits need to be unset.  Also, if we are unmounting
+ * with pinned extents still sitting there because we had a block group caching,
+ * we need to clear those now, since we are done.
+ */
+void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+{
+       u64 start, end, last = 0;
+       int ret;
+
+       while (1) {
+               ret = find_first_extent_bit(&info->pinned_extents, last,
+                                           &start, &end,
+                                           EXTENT_LOCKED|EXTENT_DIRTY);
+               if (ret)
+                       break;
+
+               clear_extent_bits(&info->pinned_extents, start, end,
+                                 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
+               last = end+1;
+       }
+}
+
+static int remove_sb_from_cache(struct btrfs_root *root,
+                               struct btrfs_block_group_cache *cache)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 bytenr;
+       u64 *logical;
+       int stripe_len;
+       int i, nr, ret;
+
+       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+               bytenr = btrfs_sb_offset(i);
+               ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+                                      cache->key.objectid, bytenr,
+                                      0, &logical, &nr, &stripe_len);
+               BUG_ON(ret);
+               while (nr--) {
+                       try_lock_extent(&fs_info->pinned_extents,
+                                       logical[nr],
+                                       logical[nr] + stripe_len - 1, GFP_NOFS);
+               }
+               kfree(logical);
+       }
+
+       return 0;
+}
+
 /*
  * this is only called by cache_block_group, since we could have freed extents
  * we need to check the pinned_extents for any extents that can't be used yet
  * since their free space will be released as soon as the transaction commits.
  */
-static int add_new_free_space(struct btrfs_block_group_cache *block_group,
+static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
                              struct btrfs_fs_info *info, u64 start, u64 end)
 {
-       u64 extent_start, extent_end, size;
+       u64 extent_start, extent_end, size, total_added = 0;
        int ret;
 
        while (start < end) {
                ret = find_first_extent_bit(&info->pinned_extents, start,
                                            &extent_start, &extent_end,
-                                           EXTENT_DIRTY);
+                                           EXTENT_DIRTY|EXTENT_LOCKED);
                if (ret)
                        break;
 
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
                        start = extent_end + 1;
                } else if (extent_start > start && extent_start < end) {
                        size = extent_start - start;
+                       total_added += size;
                        ret = btrfs_add_free_space(block_group, start,
                                                   size);
                        BUG_ON(ret);
@@ -178,84 +237,79 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
 
        if (start < end) {
                size = end - start;
+               total_added += size;
                ret = btrfs_add_free_space(block_group, start, size);
                BUG_ON(ret);
        }
 
-       return 0;
+       return total_added;
 }
 
-static int remove_sb_from_cache(struct btrfs_root *root,
-                               struct btrfs_block_group_cache *cache)
-{
-       u64 bytenr;
-       u64 *logical;
-       int stripe_len;
-       int i, nr, ret;
-
-       for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
-               bytenr = btrfs_sb_offset(i);
-               ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
-                                      cache->key.objectid, bytenr, 0,
-                                      &logical, &nr, &stripe_len);
-               BUG_ON(ret);
-               while (nr--) {
-                       btrfs_remove_free_space(cache, logical[nr],
-                                               stripe_len);
-               }
-               kfree(logical);
-       }
-       return 0;
-}
-
-static int cache_block_group(struct btrfs_root *root,
-                            struct btrfs_block_group_cache *block_group)
+static int caching_kthread(void *data)
 {
+       struct btrfs_block_group_cache *block_group = data;
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       u64 last = 0;
        struct btrfs_path *path;
        int ret = 0;
        struct btrfs_key key;
        struct extent_buffer *leaf;
        int slot;
-       u64 last;
-
-       if (!block_group)
-               return 0;
+       u64 total_found = 0;
 
-       root = root->fs_info->extent_root;
-
-       if (block_group->cached)
-               return 0;
+       BUG_ON(!fs_info);
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       path->reada = 2;
+       atomic_inc(&block_group->space_info->caching_threads);
+       last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+again:
+       /* need to make sure the commit_root doesn't disappear */
+       down_read(&fs_info->extent_root->commit_root_sem);
+
        /*
-        * we get into deadlocks with paths held by callers of this function.
-        * since the alloc_mutex is protecting things right now, just
-        * skip the locking here
+        * We don't want to deadlock with somebody trying to allocate a new
+        * extent for the extent root while also trying to search the extent
+        * root to add free space.  So we skip locking and search the commit
+        * root, since its read-only
         */
        path->skip_locking = 1;
-       last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+       path->search_commit_root = 1;
+       path->reada = 2;
+
        key.objectid = last;
        key.offset = 0;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
 
        while (1) {
+               smp_mb();
+               if (block_group->fs_info->closing > 1) {
+                       last = (u64)-1;
+                       break;
+               }
+
                leaf = path->nodes[0];
                slot = path->slots[0];
                if (slot >= btrfs_header_nritems(leaf)) {
-                       ret = btrfs_next_leaf(root, path);
+                       ret = btrfs_next_leaf(fs_info->extent_root, path);
                        if (ret < 0)
                                goto err;
-                       if (ret == 0)
-                               continue;
-                       else
+                       else if (ret)
                                break;
+
+                       if (need_resched()) {
+                               btrfs_release_path(fs_info->extent_root, path);
+                               up_read(&fs_info->extent_root->commit_root_sem);
+                               cond_resched();
+                               goto again;
+                       }
+
+                       continue;
                }
                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (key.objectid < block_group->key.objectid)
@@ -266,24 +320,59 @@ static int cache_block_group(struct btrfs_root *root,
                        break;
 
                if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
-                       add_new_free_space(block_group, root->fs_info, last,
-                                          key.objectid);
-
+                       total_found += add_new_free_space(block_group,
+                                                         fs_info, last,
+                                                         key.objectid);
                        last = key.objectid + key.offset;
                }
+
+               if (total_found > (1024 * 1024 * 2)) {
+                       total_found = 0;
+                       wake_up(&block_group->caching_q);
+               }
 next:
                path->slots[0]++;
        }
+       ret = 0;
 
-       add_new_free_space(block_group, root->fs_info, last,
-                          block_group->key.objectid +
-                          block_group->key.offset);
+       total_found += add_new_free_space(block_group, fs_info, last,
+                                         block_group->key.objectid +
+                                         block_group->key.offset);
+
+       spin_lock(&block_group->lock);
+       block_group->cached = BTRFS_CACHE_FINISHED;
+       spin_unlock(&block_group->lock);
 
-       block_group->cached = 1;
-       remove_sb_from_cache(root, block_group);
-       ret = 0;
 err:
        btrfs_free_path(path);
+       up_read(&fs_info->extent_root->commit_root_sem);
+       atomic_dec(&block_group->space_info->caching_threads);
+       wake_up(&block_group->caching_q);
+
+       return 0;
+}
+
+static int cache_block_group(struct btrfs_block_group_cache *cache)
+{
+       struct task_struct *tsk;
+       int ret = 0;
+
+       spin_lock(&cache->lock);
+       if (cache->cached != BTRFS_CACHE_NO) {
+               spin_unlock(&cache->lock);
+               return ret;
+       }
+       cache->cached = BTRFS_CACHE_STARTED;
+       spin_unlock(&cache->lock);
+
+       tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
+                         cache->key.objectid);
+       if (IS_ERR(tsk)) {
+               ret = PTR_ERR(tsk);
+               printk(KERN_ERR "error running thread %d\n", ret);
+               BUG();
+       }
+
        return ret;
 }
 
@@ -2387,13 +2476,29 @@ fail:
 
 }
 
+static struct btrfs_block_group_cache *
+next_block_group(struct btrfs_root *root,
+                struct btrfs_block_group_cache *cache)
+{
+       struct rb_node *node;
+       spin_lock(&root->fs_info->block_group_cache_lock);
+       node = rb_next(&cache->cache_node);
+       btrfs_put_block_group(cache);
+       if (node) {
+               cache = rb_entry(node, struct btrfs_block_group_cache,
+                                cache_node);
+               atomic_inc(&cache->count);
+       } else
+               cache = NULL;
+       spin_unlock(&root->fs_info->block_group_cache_lock);
+       return cache;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       struct btrfs_block_group_cache *cache, *entry;
-       struct rb_node *n;
+       struct btrfs_block_group_cache *cache;
        int err = 0;
-       int werr = 0;
        struct btrfs_path *path;
        u64 last = 0;
 
@@ -2402,39 +2507,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        while (1) {
-               cache = NULL;
-               spin_lock(&root->fs_info->block_group_cache_lock);
-               for (n = rb_first(&root->fs_info->block_group_cache_tree);
-                    n; n = rb_next(n)) {
-                       entry = rb_entry(n, struct btrfs_block_group_cache,
-                                        cache_node);
-                       if (entry->dirty) {
-                               cache = entry;
-                               break;
-                       }
+               if (last == 0) {
+                       err = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long)-1);
+                       BUG_ON(err);
                }
-               spin_unlock(&root->fs_info->block_group_cache_lock);
 
-               if (!cache)
-                       break;
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       if (cache->dirty)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
 
                cache->dirty = 0;
-               last += cache->key.offset;
+               last = cache->key.objectid + cache->key.offset;
 
-               err = write_one_cache_group(trans, root,
-                                           path, cache);
-               /*
-                * if we fail to write the cache group, we want
-                * to keep it marked dirty in hopes that a later
-                * write will work
-                */
-               if (err) {
-                       werr = err;
-                       continue;
-               }
+               err = write_one_cache_group(trans, root, path, cache);
+               BUG_ON(err);
+               btrfs_put_block_group(cache);
        }
+
        btrfs_free_path(path);
-       return werr;
+       return 0;
 }
 
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2484,6 +2585,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->force_alloc = 0;
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
+       atomic_set(&found->caching_threads, 0);
        return 0;
 }
 
@@ -2947,13 +3049,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       if (pin) {
+       if (pin)
                set_extent_dirty(&fs_info->pinned_extents,
                                bytenr, bytenr + num - 1, GFP_NOFS);
-       } else {
-               clear_extent_dirty(&fs_info->pinned_extents,
-                               bytenr, bytenr + num - 1, GFP_NOFS);
-       }
 
        while (num > 0) {
                cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2969,14 +3067,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
                        spin_unlock(&cache->space_info->lock);
                        fs_info->total_pinned += len;
                } else {
+                       int unpin = 0;
+
+                       /*
+                        * in order to not race with the block group caching, we
+                        * only want to unpin the extent if we are cached.  If
+                        * we aren't cached, we want to start async caching this
+                        * block group so we can free the extent the next time
+                        * around.
+                        */
                        spin_lock(&cache->space_info->lock);
                        spin_lock(&cache->lock);
-                       cache->pinned -= len;
-                       cache->space_info->bytes_pinned -= len;
+                       unpin = (cache->cached == BTRFS_CACHE_FINISHED);
+                       if (likely(unpin)) {
+                               cache->pinned -= len;
+                               cache->space_info->bytes_pinned -= len;
+                               fs_info->total_pinned -= len;
+                       }
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
-                       fs_info->total_pinned -= len;
-                       if (cache->cached)
+
+                       if (likely(unpin))
+                               clear_extent_dirty(&fs_info->pinned_extents,
+                                                  bytenr, bytenr + len -1,
+                                                  GFP_NOFS);
+                       else
+                               cache_block_group(cache);
+
+                       if (unpin)
                                btrfs_add_free_space(cache, bytenr, len);
                }
                btrfs_put_block_group(cache);
@@ -3030,6 +3148,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
                                            &start, &end, EXTENT_DIRTY);
                if (ret)
                        break;
+
                set_extent_dirty(copy, start, end, GFP_NOFS);
                last = end + 1;
        }
@@ -3058,6 +3177,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
                cond_resched();
        }
+
        return ret;
 }
 
@@ -3435,6 +3555,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
        return ret;
 }
 
+/*
+ * when we wait for progress in the block group caching, its because
+ * our allocation attempt failed at least once.  So, we must sleep
+ * and let some progress happen before we try again.
+ *
+ * This function will sleep at least once waiting for new free space to
+ * show up, and then it will check the block group free space numbers
+ * for our min num_bytes.  Another option is to have it go ahead
+ * and look in the rbtree for a free extent of a given size, but this
+ * is a good start.
+ */
+static noinline int
+wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
+                               u64 num_bytes)
+{
+       DEFINE_WAIT(wait);
+
+       prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
+
+       if (block_group_cache_done(cache)) {
+               finish_wait(&cache->caching_q, &wait);
+               return 0;
+       }
+       schedule();
+       finish_wait(&cache->caching_q, &wait);
+
+       wait_event(cache->caching_q, block_group_cache_done(cache) ||
+                  (cache->free_space >= num_bytes));
+       return 0;
+}
+
+enum btrfs_loop_type {
+       LOOP_CACHED_ONLY = 0,
+       LOOP_CACHING_NOWAIT = 1,
+       LOOP_CACHING_WAIT = 2,
+       LOOP_ALLOC_CHUNK = 3,
+       LOOP_NO_EMPTY_SIZE = 4,
+};
+
 /*
  * walks the btree of allocated extents and find a hole of a given size.
  * The key ins is changed to record the hole:
@@ -3460,6 +3619,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_space_info *space_info;
        int last_ptr_loop = 0;
        int loop = 0;
+       bool found_uncached_bg = false;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3491,15 +3651,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        search_start = max(search_start, first_logical_byte(root, 0));
        search_start = max(search_start, hint_byte);
 
-       if (!last_ptr) {
+       if (!last_ptr)
                empty_cluster = 0;
-               loop = 1;
-       }
 
        if (search_start == hint_byte) {
                block_group = btrfs_lookup_block_group(root->fs_info,
                                                       search_start);
-               if (block_group && block_group_bits(block_group, data)) {
+               /*
+                * we don't want to use the block group if it doesn't match our
+                * allocation bits, or if its not cached.
+                */
+               if (block_group && block_group_bits(block_group, data) &&
+                   block_group_cache_done(block_group)) {
                        down_read(&space_info->groups_sem);
                        if (list_empty(&block_group->list) ||
                            block_group->ro) {
@@ -3522,21 +3685,35 @@ search:
        down_read(&space_info->groups_sem);
        list_for_each_entry(block_group, &space_info->block_groups, list) {
                u64 offset;
+               int cached;
 
                atomic_inc(&block_group->count);
                search_start = block_group->key.objectid;
 
 have_block_group:
-               if (unlikely(!block_group->cached)) {
-                       mutex_lock(&block_group->cache_mutex);
-                       ret = cache_block_group(root, block_group);
-                       mutex_unlock(&block_group->cache_mutex);
-                       if (ret) {
-                               btrfs_put_block_group(block_group);
-                               break;
+               if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
+                       /*
+                        * we want to start caching kthreads, but not too many
+                        * right off the bat so we don't overwhelm the system,
+                        * so only start them if there are less than 2 and we're
+                        * in the initial allocation phase.
+                        */
+                       if (loop > LOOP_CACHING_NOWAIT ||
+                           atomic_read(&space_info->caching_threads) < 2) {
+                               ret = cache_block_group(block_group);
+                               BUG_ON(ret);
                        }
                }
 
+               cached = block_group_cache_done(block_group);
+               if (unlikely(!cached)) {
+                       found_uncached_bg = true;
+
+                       /* if we only want cached bgs, loop */
+                       if (loop == LOOP_CACHED_ONLY)
+                               goto loop;
+               }
+
                if (unlikely(block_group->ro))
                        goto loop;
 
@@ -3615,14 +3792,21 @@ refill_cluster:
                                        spin_unlock(&last_ptr->refill_lock);
                                        goto checks;
                                }
+                       } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+                               spin_unlock(&last_ptr->refill_lock);
+
+                               wait_block_group_cache_progress(block_group,
+                                      num_bytes + empty_cluster + empty_size);
+                               goto have_block_group;
                        }
+
                        /*
                         * at this point we either didn't find a cluster
                         * or we weren't able to allocate a block from our
                         * cluster.  Free the cluster we've been trying
                         * to use, and go to the next block group
                         */
-                       if (loop < 2) {
+                       if (loop < LOOP_NO_EMPTY_SIZE) {
                                btrfs_return_cluster_to_free_space(NULL,
                                                                   last_ptr);
                                spin_unlock(&last_ptr->refill_lock);
@@ -3633,11 +3817,17 @@ refill_cluster:
 
                offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                    num_bytes, empty_size);
-               if (!offset)
+               if (!offset && (cached || (!cached &&
+                                          loop == LOOP_CACHING_NOWAIT))) {
                        goto loop;
+               } else if (!offset && (!cached &&
+                                      loop > LOOP_CACHING_NOWAIT)) {
+                       wait_block_group_cache_progress(block_group,
+                                       num_bytes + empty_size);
+                       goto have_block_group;
+               }
 checks:
                search_start = stripe_align(root, offset);
-
                /* move on to the next group */
                if (search_start + num_bytes >= search_end) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
@@ -3683,13 +3873,26 @@ loop:
        }
        up_read(&space_info->groups_sem);
 
-       /* loop == 0, try to find a clustered alloc in every block group
-        * loop == 1, try again after forcing a chunk allocation
-        * loop == 2, set empty_size and empty_cluster to 0 and try again
+       /* LOOP_CACHED_ONLY, only search fully cached block groups
+        * LOOP_CACHING_NOWAIT, search partially cached block groups, but
+        *                      dont wait foR them to finish caching
+        * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+        * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
+        * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
+        *                      again
         */
-       if (!ins->objectid && loop < 3 &&
-           (empty_size || empty_cluster || allowed_chunk_alloc)) {
-               if (loop >= 2) {
+       if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
+           (found_uncached_bg || empty_size || empty_cluster ||
+            allowed_chunk_alloc)) {
+               if (found_uncached_bg) {
+                       found_uncached_bg = false;
+                       if (loop < LOOP_CACHING_WAIT) {
+                               loop++;
+                               goto search;
+                       }
+               }
+
+               if (loop == LOOP_ALLOC_CHUNK) {
                        empty_size = 0;
                        empty_cluster = 0;
                }
@@ -3702,7 +3905,7 @@ loop:
                        space_info->force_alloc = 1;
                }
 
-               if (loop < 3) {
+               if (loop < LOOP_NO_EMPTY_SIZE) {
                        loop++;
                        goto search;
                }
@@ -3798,7 +4001,7 @@ again:
                               num_bytes, data, 1);
                goto again;
        }
-       if (ret) {
+       if (ret == -ENOSPC) {
                struct btrfs_space_info *sinfo;
 
                sinfo = __find_space_info(root->fs_info, data);
@@ -3806,7 +4009,6 @@ again:
                       "wanted %llu\n", (unsigned long long)data,
                       (unsigned long long)num_bytes);
                dump_space_info(sinfo, num_bytes);
-               BUG();
        }
 
        return ret;
@@ -3844,7 +4046,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
        ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
                                     empty_size, hint_byte, search_end, ins,
                                     data);
-       update_reserved_extents(root, ins->objectid, ins->offset, 1);
+       if (!ret)
+               update_reserved_extents(root, ins->objectid, ins->offset, 1);
+
        return ret;
 }
 
@@ -4006,9 +4210,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *block_group;
 
        block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-       mutex_lock(&block_group->cache_mutex);
-       cache_block_group(root, block_group);
-       mutex_unlock(&block_group->cache_mutex);
+       cache_block_group(block_group);
+       wait_event(block_group->caching_q,
+                  block_group_cache_done(block_group));
 
        ret = btrfs_remove_free_space(block_group, ins->objectid,
                                      ins->offset);
@@ -4039,7 +4243,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
        ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
                                     empty_size, hint_byte, search_end,
                                     ins, 0);
-       BUG_ON(ret);
+       if (ret)
+               return ret;
 
        if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
                if (parent == 0)
@@ -6955,11 +7160,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                         &info->block_group_cache_tree);
                spin_unlock(&info->block_group_cache_lock);
 
-               btrfs_remove_free_space_cache(block_group);
                down_write(&block_group->space_info->groups_sem);
                list_del(&block_group->list);
                up_write(&block_group->space_info->groups_sem);
 
+               if (block_group->cached == BTRFS_CACHE_STARTED)
+                       wait_event(block_group->caching_q,
+                                  block_group_cache_done(block_group));
+
+               btrfs_remove_free_space_cache(block_group);
+
                WARN_ON(atomic_read(&block_group->count) != 1);
                kfree(block_group);
 
@@ -7025,9 +7235,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                atomic_set(&cache->count, 1);
                spin_lock_init(&cache->lock);
                spin_lock_init(&cache->tree_lock);
-               mutex_init(&cache->cache_mutex);
+               cache->fs_info = info;
+               init_waitqueue_head(&cache->caching_q);
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
+
+               /*
+                * we only want to have 32k of ram per block group for keeping
+                * track of free space, and if we pass 1/2 of that we want to
+                * start converting things over to using bitmaps
+                */
+               cache->extents_thresh = ((1024 * 32) / 2) /
+                       sizeof(struct btrfs_free_space);
+
                read_extent_buffer(leaf, &cache->item,
                                   btrfs_item_ptr_offset(leaf, path->slots[0]),
                                   sizeof(cache->item));
@@ -7036,6 +7256,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                key.objectid = found_key.objectid + found_key.offset;
                btrfs_release_path(root, path);
                cache->flags = btrfs_block_group_flags(&cache->item);
+               cache->sectorsize = root->sectorsize;
+
+               remove_sb_from_cache(root, cache);
+
+               /*
+                * check for two cases, either we are full, and therefore
+                * don't need to bother with the caching work since we won't
+                * find any space, or we are empty, and we can just add all
+                * the space in and be done with it.  This saves us _alot_ of
+                * time, particularly in the full case.
+                */
+               if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+                       cache->cached = BTRFS_CACHE_FINISHED;
+               } else if (btrfs_block_group_used(&cache->item) == 0) {
+                       cache->cached = BTRFS_CACHE_FINISHED;
+                       add_new_free_space(cache, root->fs_info,
+                                          found_key.objectid,
+                                          found_key.objectid +
+                                          found_key.offset);
+               }
 
                ret = update_space_info(info, cache->flags, found_key.offset,
                                        btrfs_block_group_used(&cache->item),
@@ -7079,10 +7319,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->key.objectid = chunk_offset;
        cache->key.offset = size;
        cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       cache->sectorsize = root->sectorsize;
+
+       /*
+        * we only want to have 32k of ram per block group for keeping track
+        * of free space, and if we pass 1/2 of that we want to start
+        * converting things over to using bitmaps
+        */
+       cache->extents_thresh = ((1024 * 32) / 2) /
+               sizeof(struct btrfs_free_space);
        atomic_set(&cache->count, 1);
        spin_lock_init(&cache->lock);
        spin_lock_init(&cache->tree_lock);
-       mutex_init(&cache->cache_mutex);
+       init_waitqueue_head(&cache->caching_q);
        INIT_LIST_HEAD(&cache->list);
        INIT_LIST_HEAD(&cache->cluster_list);
 
@@ -7091,6 +7340,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->flags = type;
        btrfs_set_block_group_flags(&cache->item, type);
 
+       cache->cached = BTRFS_CACHE_FINISHED;
+       remove_sb_from_cache(root, cache);
+
+       add_new_free_space(cache, root->fs_info, chunk_offset,
+                          chunk_offset + size);
+
        ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
                                &cache->space_info);
        BUG_ON(ret);
@@ -7149,7 +7404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        rb_erase(&block_group->cache_node,
                 &root->fs_info->block_group_cache_tree);
        spin_unlock(&root->fs_info->block_group_cache_lock);
-       btrfs_remove_free_space_cache(block_group);
+
        down_write(&block_group->space_info->groups_sem);
        /*
         * we must use list_del_init so people can check to see if they
@@ -7158,11 +7413,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        list_del_init(&block_group->list);
        up_write(&block_group->space_info->groups_sem);
 
+       if (block_group->cached == BTRFS_CACHE_STARTED)
+               wait_event(block_group->caching_q,
+                          block_group_cache_done(block_group));
+
+       btrfs_remove_free_space_cache(block_group);
+
        spin_lock(&block_group->space_info->lock);
        block_group->space_info->total_bytes -= block_group->key.offset;
        block_group->space_info->bytes_readonly -= block_group->key.offset;
        spin_unlock(&block_group->space_info->lock);
-       block_group->space_info->full = 0;
+
+       btrfs_clear_space_info_full(root->fs_info);
 
        btrfs_put_block_group(block_group);
        btrfs_put_block_group(block_group);
index 4538e48581a5171b33af2d3d2c632dbff4e412e2..af99b78b288e8c5e8a14e44b27b3005e5676c26c 100644 (file)
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/math64.h>
 #include "ctree.h"
 #include "free-space-cache.h"
 #include "transaction.h"
 
-struct btrfs_free_space {
-       struct rb_node bytes_index;
-       struct rb_node offset_index;
-       u64 offset;
-       u64 bytes;
-};
+#define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
+#define MAX_CACHE_BYTES_PER_GIG        (32 * 1024)
 
-static int tree_insert_offset(struct rb_root *root, u64 offset,
-                             struct rb_node *node)
+static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
+                                         u64 offset)
 {
-       struct rb_node **p = &root->rb_node;
-       struct rb_node *parent = NULL;
-       struct btrfs_free_space *info;
+       BUG_ON(offset < bitmap_start);
+       offset -= bitmap_start;
+       return (unsigned long)(div64_u64(offset, sectorsize));
+}
 
-       while (*p) {
-               parent = *p;
-               info = rb_entry(parent, struct btrfs_free_space, offset_index);
+static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
+{
+       return (unsigned long)(div64_u64(bytes, sectorsize));
+}
 
-               if (offset < info->offset)
-                       p = &(*p)->rb_left;
-               else if (offset > info->offset)
-                       p = &(*p)->rb_right;
-               else
-                       return -EEXIST;
-       }
+static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
+                                  u64 offset)
+{
+       u64 bitmap_start;
+       u64 bytes_per_bitmap;
 
-       rb_link_node(node, parent, p);
-       rb_insert_color(node, root);
+       bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
+       bitmap_start = offset - block_group->key.objectid;
+       bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
+       bitmap_start *= bytes_per_bitmap;
+       bitmap_start += block_group->key.objectid;
 
-       return 0;
+       return bitmap_start;
 }
 
-static int tree_insert_bytes(struct rb_root *root, u64 bytes,
-                            struct rb_node *node)
+static int tree_insert_offset(struct rb_root *root, u64 offset,
+                             struct rb_node *node, int bitmap)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
@@ -62,12 +63,34 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
 
        while (*p) {
                parent = *p;
-               info = rb_entry(parent, struct btrfs_free_space, bytes_index);
+               info = rb_entry(parent, struct btrfs_free_space, offset_index);
 
-               if (bytes < info->bytes)
+               if (offset < info->offset) {
                        p = &(*p)->rb_left;
-               else
+               } else if (offset > info->offset) {
                        p = &(*p)->rb_right;
+               } else {
+                       /*
+                        * we could have a bitmap entry and an extent entry
+                        * share the same offset.  If this is the case, we want
+                        * the extent entry to always be found first if we do a
+                        * linear search through the tree, since we want to have
+                        * the quickest allocation time, and allocating from an
+                        * extent is faster than allocating from a bitmap.  So
+                        * if we're inserting a bitmap and we find an entry at
+                        * this offset, we want to go right, or after this entry
+                        * logically.  If we are inserting an extent and we've
+                        * found a bitmap, we want to go left, or before
+                        * logically.
+                        */
+                       if (bitmap) {
+                               WARN_ON(info->bitmap);
+                               p = &(*p)->rb_right;
+                       } else {
+                               WARN_ON(!info->bitmap);
+                               p = &(*p)->rb_left;
+                       }
+               }
        }
 
        rb_link_node(node, parent, p);
@@ -79,110 +102,143 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
 /*
  * searches the tree for the given offset.
  *
- * fuzzy == 1: this is used for allocations where we are given a hint of where
- * to look for free space.  Because the hint may not be completely on an offset
- * mark, or the hint may no longer point to free space we need to fudge our
- * results a bit.  So we look for free space starting at or after offset with at
- * least bytes size.  We prefer to find as close to the given offset as we can.
- * Also if the offset is within a free space range, then we will return the free
- * space that contains the given offset, which means we can return a free space
- * chunk with an offset before the provided offset.
- *
- * fuzzy == 0: this is just a normal tree search.  Give us the free space that
- * starts at the given offset which is at least bytes size, and if its not there
- * return NULL.
+ * fuzzy - If this is set, then we are trying to make an allocation, and we just
+ * want a section that has at least bytes size and comes at or after the given
+ * offset.
  */
-static struct btrfs_free_space *tree_search_offset(struct rb_root *root,
-                                                  u64 offset, u64 bytes,
-                                                  int fuzzy)
+static struct btrfs_free_space *
+tree_search_offset(struct btrfs_block_group_cache *block_group,
+                  u64 offset, int bitmap_only, int fuzzy)
 {
-       struct rb_node *n = root->rb_node;
-       struct btrfs_free_space *entry, *ret = NULL;
+       struct rb_node *n = block_group->free_space_offset.rb_node;
+       struct btrfs_free_space *entry, *prev = NULL;
+
+       /* find entry that is closest to the 'offset' */
+       while (1) {
+               if (!n) {
+                       entry = NULL;
+                       break;
+               }
 
-       while (n) {
                entry = rb_entry(n, struct btrfs_free_space, offset_index);
+               prev = entry;
 
-               if (offset < entry->offset) {
-                       if (fuzzy &&
-                           (!ret || entry->offset < ret->offset) &&
-                           (bytes <= entry->bytes))
-                               ret = entry;
+               if (offset < entry->offset)
                        n = n->rb_left;
-               } else if (offset > entry->offset) {
-                       if (fuzzy &&
-                           (entry->offset + entry->bytes - 1) >= offset &&
-                           bytes <= entry->bytes) {
-                               ret = entry;
-                               break;
-                       }
+               else if (offset > entry->offset)
                        n = n->rb_right;
-               } else {
-                       if (bytes > entry->bytes) {
-                               n = n->rb_right;
-                               continue;
-                       }
-                       ret = entry;
+               else
                        break;
-               }
        }
 
-       return ret;
-}
-
-/*
- * return a chunk at least bytes size, as close to offset that we can get.
- */
-static struct btrfs_free_space *tree_search_bytes(struct rb_root *root,
-                                                 u64 offset, u64 bytes)
-{
-       struct rb_node *n = root->rb_node;
-       struct btrfs_free_space *entry, *ret = NULL;
+       if (bitmap_only) {
+               if (!entry)
+                       return NULL;
+               if (entry->bitmap)
+                       return entry;
 
-       while (n) {
-               entry = rb_entry(n, struct btrfs_free_space, bytes_index);
+               /*
+                * bitmap entry and extent entry may share same offset,
+                * in that case, bitmap entry comes after extent entry.
+                */
+               n = rb_next(n);
+               if (!n)
+                       return NULL;
+               entry = rb_entry(n, struct btrfs_free_space, offset_index);
+               if (entry->offset != offset)
+                       return NULL;
 
-               if (bytes < entry->bytes) {
+               WARN_ON(!entry->bitmap);
+               return entry;
+       } else if (entry) {
+               if (entry->bitmap) {
                        /*
-                        * We prefer to get a hole size as close to the size we
-                        * are asking for so we don't take small slivers out of
-                        * huge holes, but we also want to get as close to the
-                        * offset as possible so we don't have a whole lot of
-                        * fragmentation.
+                        * if previous extent entry covers the offset,
+                        * we should return it instead of the bitmap entry
                         */
-                       if (offset <= entry->offset) {
-                               if (!ret)
-                                       ret = entry;
-                               else if (entry->bytes < ret->bytes)
-                                       ret = entry;
-                               else if (entry->offset < ret->offset)
-                                       ret = entry;
+                       n = &entry->offset_index;
+                       while (1) {
+                               n = rb_prev(n);
+                               if (!n)
+                                       break;
+                               prev = rb_entry(n, struct btrfs_free_space,
+                                               offset_index);
+                               if (!prev->bitmap) {
+                                       if (prev->offset + prev->bytes > offset)
+                                               entry = prev;
+                                       break;
+                               }
                        }
-                       n = n->rb_left;
-               } else if (bytes > entry->bytes) {
-                       n = n->rb_right;
+               }
+               return entry;
+       }
+
+       if (!prev)
+               return NULL;
+
+       /* find last entry before the 'offset' */
+       entry = prev;
+       if (entry->offset > offset) {
+               n = rb_prev(&entry->offset_index);
+               if (n) {
+                       entry = rb_entry(n, struct btrfs_free_space,
+                                       offset_index);
+                       BUG_ON(entry->offset > offset);
                } else {
-                       /*
-                        * Ok we may have multiple chunks of the wanted size,
-                        * so we don't want to take the first one we find, we
-                        * want to take the one closest to our given offset, so
-                        * keep searching just in case theres a better match.
-                        */
-                       n = n->rb_right;
-                       if (offset > entry->offset)
-                               continue;
-                       else if (!ret || entry->offset < ret->offset)
-                               ret = entry;
+                       if (fuzzy)
+                               return entry;
+                       else
+                               return NULL;
                }
        }
 
-       return ret;
+       if (entry->bitmap) {
+               n = &entry->offset_index;
+               while (1) {
+                       n = rb_prev(n);
+                       if (!n)
+                               break;
+                       prev = rb_entry(n, struct btrfs_free_space,
+                                       offset_index);
+                       if (!prev->bitmap) {
+                               if (prev->offset + prev->bytes > offset)
+                                       return prev;
+                               break;
+                       }
+               }
+               if (entry->offset + BITS_PER_BITMAP *
+                   block_group->sectorsize > offset)
+                       return entry;
+       } else if (entry->offset + entry->bytes > offset)
+               return entry;
+
+       if (!fuzzy)
+               return NULL;
+
+       while (1) {
+               if (entry->bitmap) {
+                       if (entry->offset + BITS_PER_BITMAP *
+                           block_group->sectorsize > offset)
+                               break;
+               } else {
+                       if (entry->offset + entry->bytes > offset)
+                               break;
+               }
+
+               n = rb_next(&entry->offset_index);
+               if (!n)
+                       return NULL;
+               entry = rb_entry(n, struct btrfs_free_space, offset_index);
+       }
+       return entry;
 }
 
 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
                              struct btrfs_free_space *info)
 {
        rb_erase(&info->offset_index, &block_group->free_space_offset);
-       rb_erase(&info->bytes_index, &block_group->free_space_bytes);
+       block_group->free_extents--;
+       block_group->free_space -= info->bytes;
 }
 
 static int link_free_space(struct btrfs_block_group_cache *block_group,
@@ -190,17 +246,314 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
 {
        int ret = 0;
 
-
-       BUG_ON(!info->bytes);
+       BUG_ON(!info->bitmap && !info->bytes);
        ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
-                                &info->offset_index);
+                                &info->offset_index, (info->bitmap != NULL));
        if (ret)
                return ret;
 
-       ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes,
-                               &info->bytes_index);
-       if (ret)
-               return ret;
+       block_group->free_space += info->bytes;
+       block_group->free_extents++;
+       return ret;
+}
+
+static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
+{
+       u64 max_bytes, possible_bytes;
+
+       /*
+        * The goal is to keep the total amount of memory used per 1gb of space
+        * at or below 32k, so we need to adjust how much memory we allow to be
+        * used by extent based free space tracking
+        */
+       max_bytes = MAX_CACHE_BYTES_PER_GIG *
+               (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
+
+       possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
+               (sizeof(struct btrfs_free_space) *
+                block_group->extents_thresh);
+
+       if (possible_bytes > max_bytes) {
+               int extent_bytes = max_bytes -
+                       (block_group->total_bitmaps * PAGE_CACHE_SIZE);
+
+               if (extent_bytes <= 0) {
+                       block_group->extents_thresh = 0;
+                       return;
+               }
+
+               block_group->extents_thresh = extent_bytes /
+                       (sizeof(struct btrfs_free_space));
+       }
+}
+
+static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
+                             struct btrfs_free_space *info, u64 offset,
+                             u64 bytes)
+{
+       unsigned long start, end;
+       unsigned long i;
+
+       start = offset_to_bit(info->offset, block_group->sectorsize, offset);
+       end = start + bytes_to_bits(bytes, block_group->sectorsize);
+       BUG_ON(end > BITS_PER_BITMAP);
+
+       for (i = start; i < end; i++)
+               clear_bit(i, info->bitmap);
+
+       info->bytes -= bytes;
+       block_group->free_space -= bytes;
+}
+
+static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
+                           struct btrfs_free_space *info, u64 offset,
+                           u64 bytes)
+{
+       unsigned long start, end;
+       unsigned long i;
+
+       start = offset_to_bit(info->offset, block_group->sectorsize, offset);
+       end = start + bytes_to_bits(bytes, block_group->sectorsize);
+       BUG_ON(end > BITS_PER_BITMAP);
+
+       for (i = start; i < end; i++)
+               set_bit(i, info->bitmap);
+
+       info->bytes += bytes;
+       block_group->free_space += bytes;
+}
+
+static int search_bitmap(struct btrfs_block_group_cache *block_group,
+                        struct btrfs_free_space *bitmap_info, u64 *offset,
+                        u64 *bytes)
+{
+       unsigned long found_bits = 0;
+       unsigned long bits, i;
+       unsigned long next_zero;
+
+       i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
+                         max_t(u64, *offset, bitmap_info->offset));
+       bits = bytes_to_bits(*bytes, block_group->sectorsize);
+
+       for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
+            i < BITS_PER_BITMAP;
+            i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
+               next_zero = find_next_zero_bit(bitmap_info->bitmap,
+                                              BITS_PER_BITMAP, i);
+               if ((next_zero - i) >= bits) {
+                       found_bits = next_zero - i;
+                       break;
+               }
+               i = next_zero;
+       }
+
+       if (found_bits) {
+               *offset = (u64)(i * block_group->sectorsize) +
+                       bitmap_info->offset;
+               *bytes = (u64)(found_bits) * block_group->sectorsize;
+               return 0;
+       }
+
+       return -1;
+}
+
+static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
+                                               *block_group, u64 *offset,
+                                               u64 *bytes, int debug)
+{
+       struct btrfs_free_space *entry;
+       struct rb_node *node;
+       int ret;
+
+       if (!block_group->free_space_offset.rb_node)
+               return NULL;
+
+       entry = tree_search_offset(block_group,
+                                  offset_to_bitmap(block_group, *offset),
+                                  0, 1);
+       if (!entry)
+               return NULL;
+
+       for (node = &entry->offset_index; node; node = rb_next(node)) {
+               entry = rb_entry(node, struct btrfs_free_space, offset_index);
+               if (entry->bytes < *bytes)
+                       continue;
+
+               if (entry->bitmap) {
+                       ret = search_bitmap(block_group, entry, offset, bytes);
+                       if (!ret)
+                               return entry;
+                       continue;
+               }
+
+               *offset = entry->offset;
+               *bytes = entry->bytes;
+               return entry;
+       }
+
+       return NULL;
+}
+
+static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
+                          struct btrfs_free_space *info, u64 offset)
+{
+       u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
+       int max_bitmaps = (int)div64_u64(block_group->key.offset +
+                                        bytes_per_bg - 1, bytes_per_bg);
+       BUG_ON(block_group->total_bitmaps >= max_bitmaps);
+
+       info->offset = offset_to_bitmap(block_group, offset);
+       link_free_space(block_group, info);
+       block_group->total_bitmaps++;
+
+       recalculate_thresholds(block_group);
+}
+
+static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
+                             struct btrfs_free_space *bitmap_info,
+                             u64 *offset, u64 *bytes)
+{
+       u64 end;
+
+again:
+       end = bitmap_info->offset +
+               (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
+
+       if (*offset > bitmap_info->offset && *offset + *bytes > end) {
+               bitmap_clear_bits(block_group, bitmap_info, *offset,
+                                 end - *offset + 1);
+               *bytes -= end - *offset + 1;
+               *offset = end + 1;
+       } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
+               bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
+               *bytes = 0;
+       }
+
+       if (*bytes) {
+               if (!bitmap_info->bytes) {
+                       unlink_free_space(block_group, bitmap_info);
+                       kfree(bitmap_info->bitmap);
+                       kfree(bitmap_info);
+                       block_group->total_bitmaps--;
+                       recalculate_thresholds(block_group);
+               }
+
+               bitmap_info = tree_search_offset(block_group,
+                                                offset_to_bitmap(block_group,
+                                                                 *offset),
+                                                1, 0);
+               if (!bitmap_info)
+                       return -EINVAL;
+
+               if (!bitmap_info->bitmap)
+                       return -EAGAIN;
+
+               goto again;
+       } else if (!bitmap_info->bytes) {
+               unlink_free_space(block_group, bitmap_info);
+               kfree(bitmap_info->bitmap);
+               kfree(bitmap_info);
+               block_group->total_bitmaps--;
+               recalculate_thresholds(block_group);
+       }
+
+       return 0;
+}
+
+static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
+                             struct btrfs_free_space *info)
+{
+       struct btrfs_free_space *bitmap_info;
+       int added = 0;
+       u64 bytes, offset, end;
+       int ret;
+
+       /*
+        * If we are below the extents threshold then we can add this as an
+        * extent, and don't have to deal with the bitmap
+        */
+       if (block_group->free_extents < block_group->extents_thresh &&
+           info->bytes > block_group->sectorsize * 4)
+               return 0;
+
+       /*
+        * some block groups are so tiny they can't be enveloped by a bitmap, so
+        * don't even bother to create a bitmap for this
+        */
+       if (BITS_PER_BITMAP * block_group->sectorsize >
+           block_group->key.offset)
+               return 0;
+
+       bytes = info->bytes;
+       offset = info->offset;
+
+again:
+       bitmap_info = tree_search_offset(block_group,
+                                        offset_to_bitmap(block_group, offset),
+                                        1, 0);
+       if (!bitmap_info) {
+               BUG_ON(added);
+               goto new_bitmap;
+       }
+
+       end = bitmap_info->offset +
+               (u64)(BITS_PER_BITMAP * block_group->sectorsize);
+
+       if (offset >= bitmap_info->offset && offset + bytes > end) {
+               bitmap_set_bits(block_group, bitmap_info, offset,
+                               end - offset);
+               bytes -= end - offset;
+               offset = end;
+               added = 0;
+       } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
+               bitmap_set_bits(block_group, bitmap_info, offset, bytes);
+               bytes = 0;
+       } else {
+               BUG();
+       }
+
+       if (!bytes) {
+               ret = 1;
+               goto out;
+       } else
+               goto again;
+
+new_bitmap:
+       if (info && info->bitmap) {
+               add_new_bitmap(block_group, info, offset);
+               added = 1;
+               info = NULL;
+               goto again;
+       } else {
+               spin_unlock(&block_group->tree_lock);
+
+               /* no pre-allocated info, allocate a new one */
+               if (!info) {
+                       info = kzalloc(sizeof(struct btrfs_free_space),
+                                      GFP_NOFS);
+                       if (!info) {
+                               spin_lock(&block_group->tree_lock);
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+               }
+
+               /* allocate the bitmap */
+               info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+               spin_lock(&block_group->tree_lock);
+               if (!info->bitmap) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               goto again;
+       }
+
+out:
+       if (info) {
+               if (info->bitmap)
+                       kfree(info->bitmap);
+               kfree(info);
+       }
 
        return ret;
 }
@@ -208,8 +561,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
                         u64 offset, u64 bytes)
 {
-       struct btrfs_free_space *right_info;
-       struct btrfs_free_space *left_info;
+       struct btrfs_free_space *right_info = NULL;
+       struct btrfs_free_space *left_info = NULL;
        struct btrfs_free_space *info = NULL;
        int ret = 0;
 
@@ -227,18 +580,38 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
         * are adding, if there is remove that struct and add a new one to
         * cover the entire range
         */
-       right_info = tree_search_offset(&block_group->free_space_offset,
-                                       offset+bytes, 0, 0);
-       left_info = tree_search_offset(&block_group->free_space_offset,
-                                      offset-1, 0, 1);
+       right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
+       if (right_info && rb_prev(&right_info->offset_index))
+               left_info = rb_entry(rb_prev(&right_info->offset_index),
+                                    struct btrfs_free_space, offset_index);
+       else
+               left_info = tree_search_offset(block_group, offset - 1, 0, 0);
 
-       if (right_info) {
+       /*
+        * If there was no extent directly to the left or right of this new
+        * extent then we know we're going to have to allocate a new extent, so
+        * before we do that see if we need to drop this into a bitmap
+        */
+       if ((!left_info || left_info->bitmap) &&
+           (!right_info || right_info->bitmap)) {
+               ret = insert_into_bitmap(block_group, info);
+
+               if (ret < 0) {
+                       goto out;
+               } else if (ret) {
+                       ret = 0;
+                       goto out;
+               }
+       }
+
+       if (right_info && !right_info->bitmap) {
                unlink_free_space(block_group, right_info);
                info->bytes += right_info->bytes;
                kfree(right_info);
        }
 
-       if (left_info && left_info->offset + left_info->bytes == offset) {
+       if (left_info && !left_info->bitmap &&
+           left_info->offset + left_info->bytes == offset) {
                unlink_free_space(block_group, left_info);
                info->offset = left_info->offset;
                info->bytes += left_info->bytes;
@@ -248,11 +621,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
        ret = link_free_space(block_group, info);
        if (ret)
                kfree(info);
-
+out:
        spin_unlock(&block_group->tree_lock);
 
        if (ret) {
-               printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret);
+               printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
                BUG_ON(ret == -EEXIST);
        }
 
@@ -263,40 +636,65 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
                            u64 offset, u64 bytes)
 {
        struct btrfs_free_space *info;
+       struct btrfs_free_space *next_info = NULL;
        int ret = 0;
 
        spin_lock(&block_group->tree_lock);
 
-       info = tree_search_offset(&block_group->free_space_offset, offset, 0,
-                                 1);
-       if (info && info->offset == offset) {
-               if (info->bytes < bytes) {
-                       printk(KERN_ERR "Found free space at %llu, size %llu,"
-                              "trying to use %llu\n",
-                              (unsigned long long)info->offset,
-                              (unsigned long long)info->bytes,
-                              (unsigned long long)bytes);
+again:
+       info = tree_search_offset(block_group, offset, 0, 0);
+       if (!info) {
+               WARN_ON(1);
+               goto out_lock;
+       }
+
+       if (info->bytes < bytes && rb_next(&info->offset_index)) {
+               u64 end;
+               next_info = rb_entry(rb_next(&info->offset_index),
+                                            struct btrfs_free_space,
+                                            offset_index);
+
+               if (next_info->bitmap)
+                       end = next_info->offset + BITS_PER_BITMAP *
+                               block_group->sectorsize - 1;
+               else
+                       end = next_info->offset + next_info->bytes;
+
+               if (next_info->bytes < bytes ||
+                   next_info->offset > offset || offset > end) {
+                       printk(KERN_CRIT "Found free space at %llu, size %llu,"
+                             " trying to use %llu\n",
+                             (unsigned long long)info->offset,
+                             (unsigned long long)info->bytes,
+                             (unsigned long long)bytes);
                        WARN_ON(1);
                        ret = -EINVAL;
-                       spin_unlock(&block_group->tree_lock);
-                       goto out;
+                       goto out_lock;
                }
-               unlink_free_space(block_group, info);
 
-               if (info->bytes == bytes) {
-                       kfree(info);
-                       spin_unlock(&block_group->tree_lock);
-                       goto out;
+               info = next_info;
+       }
+
+       if (info->bytes == bytes) {
+               unlink_free_space(block_group, info);
+               if (info->bitmap) {
+                       kfree(info->bitmap);
+                       block_group->total_bitmaps--;
                }
+               kfree(info);
+               goto out_lock;
+       }
 
+       if (!info->bitmap && info->offset == offset) {
+               unlink_free_space(block_group, info);
                info->offset += bytes;
                info->bytes -= bytes;
+               link_free_space(block_group, info);
+               goto out_lock;
+       }
 
-               ret = link_free_space(block_group, info);
-               spin_unlock(&block_group->tree_lock);
-               BUG_ON(ret);
-       } else if (info && info->offset < offset &&
-                  info->offset + info->bytes >= offset + bytes) {
+       if (!info->bitmap && info->offset <= offset &&
+           info->offset + info->bytes >= offset + bytes) {
                u64 old_start = info->offset;
                /*
                 * we're freeing space in the middle of the info,
@@ -312,7 +710,9 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
                        info->offset = offset + bytes;
                        info->bytes = old_end - info->offset;
                        ret = link_free_space(block_group, info);
-                       BUG_ON(ret);
+                       WARN_ON(ret);
+                       if (ret)
+                               goto out_lock;
                } else {
                        /* the hole we're creating ends at the end
                         * of the info struct, just free the info
@@ -320,32 +720,22 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
                        kfree(info);
                }
                spin_unlock(&block_group->tree_lock);
-               /* step two, insert a new info struct to cover anything
-                * before the hole
+
+               /* step two, insert a new info struct to cover
+                * anything before the hole
                 */
                ret = btrfs_add_free_space(block_group, old_start,
                                           offset - old_start);
-               BUG_ON(ret);
-       } else {
-               spin_unlock(&block_group->tree_lock);
-               if (!info) {
-                       printk(KERN_ERR "couldn't find space %llu to free\n",
-                              (unsigned long long)offset);
-                       printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
-                              block_group->cached,
-                              (unsigned long long)block_group->key.objectid,
-                              (unsigned long long)block_group->key.offset);
-                       btrfs_dump_free_space(block_group, bytes);
-               } else if (info) {
-                       printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
-                              "but wanted offset=%llu bytes=%llu\n",
-                              (unsigned long long)info->offset,
-                              (unsigned long long)info->bytes,
-                              (unsigned long long)offset,
-                              (unsigned long long)bytes);
-               }
-               WARN_ON(1);
+               WARN_ON(ret);
+               goto out;
        }
+
+       ret = remove_from_bitmap(block_group, info, &offset, &bytes);
+       if (ret == -EAGAIN)
+               goto again;
+       BUG_ON(ret);
+out_lock:
+       spin_unlock(&block_group->tree_lock);
 out:
        return ret;
 }
@@ -361,10 +751,13 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                info = rb_entry(n, struct btrfs_free_space, offset_index);
                if (info->bytes >= bytes)
                        count++;
-               printk(KERN_ERR "entry offset %llu, bytes %llu\n",
+               printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
                       (unsigned long long)info->offset,
-                      (unsigned long long)info->bytes);
+                      (unsigned long long)info->bytes,
+                      (info->bitmap) ? "yes" : "no");
        }
+       printk(KERN_INFO "block group has cluster?: %s\n",
+              list_empty(&block_group->cluster_list) ? "no" : "yes");
        printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
               "\n", count);
 }
@@ -397,26 +790,35 @@ __btrfs_return_cluster_to_free_space(
 {
        struct btrfs_free_space *entry;
        struct rb_node *node;
+       bool bitmap;
 
        spin_lock(&cluster->lock);
        if (cluster->block_group != block_group)
                goto out;
 
+       bitmap = cluster->points_to_bitmap;
+       cluster->block_group = NULL;
        cluster->window_start = 0;
+       list_del_init(&cluster->block_group_list);
+       cluster->points_to_bitmap = false;
+
+       if (bitmap)
+               goto out;
+
        node = rb_first(&cluster->root);
-       while(node) {
+       while (node) {
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
                node = rb_next(&entry->offset_index);
                rb_erase(&entry->offset_index, &cluster->root);
-               link_free_space(block_group, entry);
+               BUG_ON(entry->bitmap);
+               tree_insert_offset(&block_group->free_space_offset,
+                                  entry->offset, &entry->offset_index, 0);
        }
-       list_del_init(&cluster->block_group_list);
-
-       btrfs_put_block_group(cluster->block_group);
-       cluster->block_group = NULL;
        cluster->root.rb_node = NULL;
+
 out:
        spin_unlock(&cluster->lock);
+       btrfs_put_block_group(block_group);
        return 0;
 }
 
@@ -425,20 +827,28 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
        struct btrfs_free_space *info;
        struct rb_node *node;
        struct btrfs_free_cluster *cluster;
-       struct btrfs_free_cluster *safe;
+       struct list_head *head;
 
        spin_lock(&block_group->tree_lock);
-
-       list_for_each_entry_safe(cluster, safe, &block_group->cluster_list,
-                                block_group_list) {
+       while ((head = block_group->cluster_list.next) !=
+              &block_group->cluster_list) {
+               cluster = list_entry(head, struct btrfs_free_cluster,
+                                    block_group_list);
 
                WARN_ON(cluster->block_group != block_group);
                __btrfs_return_cluster_to_free_space(block_group, cluster);
+               if (need_resched()) {
+                       spin_unlock(&block_group->tree_lock);
+                       cond_resched();
+                       spin_lock(&block_group->tree_lock);
+               }
        }
 
-       while ((node = rb_last(&block_group->free_space_bytes)) != NULL) {
-               info = rb_entry(node, struct btrfs_free_space, bytes_index);
+       while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
+               info = rb_entry(node, struct btrfs_free_space, offset_index);
                unlink_free_space(block_group, info);
+               if (info->bitmap)
+                       kfree(info->bitmap);
                kfree(info);
                if (need_resched()) {
                        spin_unlock(&block_group->tree_lock);
@@ -446,6 +856,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
                        spin_lock(&block_group->tree_lock);
                }
        }
+
        spin_unlock(&block_group->tree_lock);
 }
 
@@ -453,25 +864,35 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
                               u64 offset, u64 bytes, u64 empty_size)
 {
        struct btrfs_free_space *entry = NULL;
+       u64 bytes_search = bytes + empty_size;
        u64 ret = 0;
 
        spin_lock(&block_group->tree_lock);
-       entry = tree_search_offset(&block_group->free_space_offset, offset,
-                                  bytes + empty_size, 1);
+       entry = find_free_space(block_group, &offset, &bytes_search, 0);
        if (!entry)
-               entry = tree_search_bytes(&block_group->free_space_bytes,
-                                         offset, bytes + empty_size);
-       if (entry) {
+               goto out;
+
+       ret = offset;
+       if (entry->bitmap) {
+               bitmap_clear_bits(block_group, entry, offset, bytes);
+               if (!entry->bytes) {
+                       unlink_free_space(block_group, entry);
+                       kfree(entry->bitmap);
+                       kfree(entry);
+                       block_group->total_bitmaps--;
+                       recalculate_thresholds(block_group);
+               }
+       } else {
                unlink_free_space(block_group, entry);
-               ret = entry->offset;
                entry->offset += bytes;
                entry->bytes -= bytes;
-
                if (!entry->bytes)
                        kfree(entry);
                else
                        link_free_space(block_group, entry);
        }
+
+out:
        spin_unlock(&block_group->tree_lock);
 
        return ret;
@@ -517,6 +938,47 @@ int btrfs_return_cluster_to_free_space(
        return ret;
 }
 
+static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
+                                  struct btrfs_free_cluster *cluster,
+                                  u64 bytes, u64 min_start)
+{
+       struct btrfs_free_space *entry;
+       int err;
+       u64 search_start = cluster->window_start;
+       u64 search_bytes = bytes;
+       u64 ret = 0;
+
+       spin_lock(&block_group->tree_lock);
+       spin_lock(&cluster->lock);
+
+       if (!cluster->points_to_bitmap)
+               goto out;
+
+       if (cluster->block_group != block_group)
+               goto out;
+
+       entry = tree_search_offset(block_group, search_start, 0, 0);
+
+       if (!entry || !entry->bitmap)
+               goto out;
+
+       search_start = min_start;
+       search_bytes = bytes;
+
+       err = search_bitmap(block_group, entry, &search_start,
+                           &search_bytes);
+       if (err)
+               goto out;
+
+       ret = search_start;
+       bitmap_clear_bits(block_group, entry, ret, bytes);
+out:
+       spin_unlock(&cluster->lock);
+       spin_unlock(&block_group->tree_lock);
+
+       return ret;
+}
+
 /*
  * given a cluster, try to allocate 'bytes' from it, returns 0
  * if it couldn't find anything suitably large, or a logical disk offset
@@ -530,6 +992,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
        struct rb_node *node;
        u64 ret = 0;
 
+       if (cluster->points_to_bitmap)
+               return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
+                                              min_start);
+
        spin_lock(&cluster->lock);
        if (bytes > cluster->max_size)
                goto out;
@@ -567,9 +1033,73 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
        }
 out:
        spin_unlock(&cluster->lock);
+
        return ret;
 }
 
+static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
+                               struct btrfs_free_space *entry,
+                               struct btrfs_free_cluster *cluster,
+                               u64 offset, u64 bytes, u64 min_bytes)
+{
+       unsigned long next_zero;
+       unsigned long i;
+       unsigned long search_bits;
+       unsigned long total_bits;
+       unsigned long found_bits;
+       unsigned long start = 0;
+       unsigned long total_found = 0;
+       bool found = false;
+
+       i = offset_to_bit(entry->offset, block_group->sectorsize,
+                         max_t(u64, offset, entry->offset));
+       search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
+       total_bits = bytes_to_bits(bytes, block_group->sectorsize);
+
+again:
+       found_bits = 0;
+       for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
+            i < BITS_PER_BITMAP;
+            i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
+               next_zero = find_next_zero_bit(entry->bitmap,
+                                              BITS_PER_BITMAP, i);
+               if (next_zero - i >= search_bits) {
+                       found_bits = next_zero - i;
+                       break;
+               }
+               i = next_zero;
+       }
+
+       if (!found_bits)
+               return -1;
+
+       if (!found) {
+               start = i;
+               found = true;
+       }
+
+       total_found += found_bits;
+
+       if (cluster->max_size < found_bits * block_group->sectorsize)
+               cluster->max_size = found_bits * block_group->sectorsize;
+
+       if (total_found < total_bits) {
+               i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
+               if (i - start > total_bits * 2) {
+                       total_found = 0;
+                       cluster->max_size = 0;
+                       found = false;
+               }
+               goto again;
+       }
+
+       cluster->window_start = start * block_group->sectorsize +
+               entry->offset;
+       cluster->points_to_bitmap = true;
+
+       return 0;
+}
+
 /*
  * here we try to find a cluster of blocks in a block group.  The goal
  * is to find at least bytes free and up to empty_size + bytes free.
@@ -587,12 +1117,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
        struct btrfs_free_space *entry = NULL;
        struct rb_node *node;
        struct btrfs_free_space *next;
-       struct btrfs_free_space *last;
+       struct btrfs_free_space *last = NULL;
        u64 min_bytes;
        u64 window_start;
        u64 window_free;
        u64 max_extent = 0;
-       int total_retries = 0;
+       bool found_bitmap = false;
        int ret;
 
        /* for metadata, allow allocates with more holes */
@@ -620,30 +1150,79 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
                goto out;
        }
 again:
-       min_bytes = min(min_bytes, bytes + empty_size);
-       entry = tree_search_bytes(&block_group->free_space_bytes,
-                                 offset, min_bytes);
+       entry = tree_search_offset(block_group, offset, found_bitmap, 1);
        if (!entry) {
                ret = -ENOSPC;
                goto out;
        }
+
+       /*
+        * If found_bitmap is true, we exhausted our search for extent entries,
+        * and we just want to search all of the bitmaps that we can find, and
+        * ignore any extent entries we find.
+        */
+       while (entry->bitmap || found_bitmap ||
+              (!entry->bitmap && entry->bytes < min_bytes)) {
+               struct rb_node *node = rb_next(&entry->offset_index);
+
+               if (entry->bitmap && entry->bytes > bytes + empty_size) {
+                       ret = btrfs_bitmap_cluster(block_group, entry, cluster,
+                                                  offset, bytes + empty_size,
+                                                  min_bytes);
+                       if (!ret)
+                               goto got_it;
+               }
+
+               if (!node) {
+                       ret = -ENOSPC;
+                       goto out;
+               }
+               entry = rb_entry(node, struct btrfs_free_space, offset_index);
+       }
+
+       /*
+        * We already searched all the extent entries from the passed in offset
+        * to the end and didn't find enough space for the cluster, and we also
+        * didn't find any bitmaps that met our criteria, just go ahead and exit
+        */
+       if (found_bitmap) {
+               ret = -ENOSPC;
+               goto out;
+       }
+
+       cluster->points_to_bitmap = false;
        window_start = entry->offset;
        window_free = entry->bytes;
        last = entry;
        max_extent = entry->bytes;
 
-       while(1) {
+       while (1) {
                /* out window is just right, lets fill it */
                if (window_free >= bytes + empty_size)
                        break;
 
                node = rb_next(&last->offset_index);
                if (!node) {
+                       if (found_bitmap)
+                               goto again;
                        ret = -ENOSPC;
                        goto out;
                }
                next = rb_entry(node, struct btrfs_free_space, offset_index);
 
+               /*
+                * we found a bitmap, so if this search doesn't result in a
+                * cluster, we know to go and search again for the bitmaps and
+                * start looking for space there
+                */
+               if (next->bitmap) {
+                       if (!found_bitmap)
+                               offset = next->offset;
+                       found_bitmap = true;
+                       last = next;
+                       continue;
+               }
+
                /*
                 * we haven't filled the empty size and the window is
                 * very large.  reset and try again
@@ -655,19 +1234,6 @@ again:
                        window_free = entry->bytes;
                        last = entry;
                        max_extent = 0;
-                       total_retries++;
-                       if (total_retries % 64 == 0) {
-                               if (min_bytes >= (bytes + empty_size)) {
-                                       ret = -ENOSPC;
-                                       goto out;
-                               }
-                               /*
-                                * grow our allocation a bit, we're not having
-                                * much luck
-                                */
-                               min_bytes *= 2;
-                               goto again;
-                       }
                } else {
                        last = next;
                        window_free += next->bytes;
@@ -685,11 +1251,19 @@ again:
         * The cluster includes an rbtree, but only uses the offset index
         * of each free space cache entry.
         */
-       while(1) {
+       while (1) {
                node = rb_next(&entry->offset_index);
-               unlink_free_space(block_group, entry);
+               if (entry->bitmap && node) {
+                       entry = rb_entry(node, struct btrfs_free_space,
+                                        offset_index);
+                       continue;
+               } else if (entry->bitmap && !node) {
+                       break;
+               }
+
+               rb_erase(&entry->offset_index, &block_group->free_space_offset);
                ret = tree_insert_offset(&cluster->root, entry->offset,
-                                        &entry->offset_index);
+                                        &entry->offset_index, 0);
                BUG_ON(ret);
 
                if (!node || entry == last)
@@ -697,8 +1271,10 @@ again:
 
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
        }
-       ret = 0;
+
        cluster->max_size = max_extent;
+got_it:
+       ret = 0;
        atomic_inc(&block_group->count);
        list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
        cluster->block_group = block_group;
@@ -718,6 +1294,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
        spin_lock_init(&cluster->refill_lock);
        cluster->root.rb_node = NULL;
        cluster->max_size = 0;
+       cluster->points_to_bitmap = false;
        INIT_LIST_HEAD(&cluster->block_group_list);
        cluster->block_group = NULL;
 }
index 266fb876405442841b31b7e8219fff82e339d821..890a8e79011b2cadc6adc545909289ed90707a27 100644 (file)
 #ifndef __BTRFS_FREE_SPACE_CACHE
 #define __BTRFS_FREE_SPACE_CACHE
 
+struct btrfs_free_space {
+       struct rb_node offset_index;
+       u64 offset;
+       u64 bytes;
+       unsigned long *bitmap;
+       struct list_head list;
+};
+
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
                         u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
index 791eab19e330b46f93086ac92b7f1b150b5e1513..56fe83fa60c445d365f4339d1aa472cf875490d3 100644 (file)
@@ -2603,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        if (root->ref_cows)
                btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
        path = btrfs_alloc_path();
-       path->reada = -1;
        BUG_ON(!path);
+       path->reada = -1;
 
        /* FIXME, add redo link to tree so we don't leak on crash */
        key.objectid = inode->i_ino;
index 6d6523da0a30e769c56380c37c6e6aca019217bd..0d126be22b636c239d3e7585803e331d3d6c2f5b 100644 (file)
@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
        }
        printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
               (unsigned long long)btrfs_header_bytenr(c),
-              btrfs_header_level(c), nr,
+             level, nr,
               (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
        for (i = 0; i < nr; i++) {
                btrfs_node_key_to_cpu(c, &key, i);
@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
                                        btrfs_level_size(root, level - 1),
                                        btrfs_node_ptr_generation(c, i));
                if (btrfs_is_leaf(next) &&
-                   btrfs_header_level(c) != 1)
+                  level != 1)
                        BUG();
                if (btrfs_header_level(next) !=
-                       btrfs_header_level(c) - 1)
+                      level - 1)
                        BUG();
                btrfs_print_tree(root, next);
                free_extent_buffer(next);
index 008397934778d457f7d90c3fbd03955fab230639..e71264d1c2c93acc3e1f418b6362c231dd61ad5f 100644 (file)
@@ -670,6 +670,8 @@ again:
                        err = ret;
                        goto out;
                }
+               if (ret > 0 && path2->slots[level] > 0)
+                       path2->slots[level]--;
 
                eb = path2->nodes[level];
                WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                BUG_ON(level == 0);
                path->lowest_level = level;
                ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
+               path->lowest_level = 0;
                if (ret < 0) {
                        btrfs_free_path(path);
                        return ret;
index 2dbf1c1f56ee41b73068c54ee7c345a969b7f38e..e51d2bc532f8fa95a0febf2ab4682547f133b746 100644 (file)
@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
        }
 }
 
+static noinline void switch_commit_root(struct btrfs_root *root)
+{
+       down_write(&root->commit_root_sem);
+       free_extent_buffer(root->commit_root);
+       root->commit_root = btrfs_root_node(root);
+       up_write(&root->commit_root_sem);
+}
+
 /*
  * either allocate a new transaction or hop into the existing one
  */
@@ -444,9 +452,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
        btrfs_write_dirty_block_groups(trans, root);
 
-       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-       BUG_ON(ret);
-
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
                if (old_root_bytenr == root->node->start)
@@ -457,13 +462,11 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                                        &root->root_key,
                                        &root->root_item);
                BUG_ON(ret);
-               btrfs_write_dirty_block_groups(trans, root);
 
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+               ret = btrfs_write_dirty_block_groups(trans, root);
                BUG_ON(ret);
        }
-       free_extent_buffer(root->commit_root);
-       root->commit_root = btrfs_root_node(root);
+       switch_commit_root(root);
        return 0;
 }
 
@@ -495,9 +498,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                root = list_entry(next, struct btrfs_root, dirty_list);
 
                update_cowonly_root(trans, root);
-
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-               BUG_ON(ret);
        }
        return 0;
 }
@@ -544,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        btrfs_update_reloc_root(trans, root);
 
                        if (root->commit_root != root->node) {
-                               free_extent_buffer(root->commit_root);
-                               root->commit_root = btrfs_root_node(root);
+                               switch_commit_root(root);
                                btrfs_set_root_node(&root->root_item,
                                                    root->node);
                        }
@@ -943,9 +942,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
                mutex_unlock(&root->fs_info->trans_mutex);
 
-               if (flush_on_commit || snap_pending) {
-                       if (flush_on_commit)
-                               btrfs_start_delalloc_inodes(root);
+               if (flush_on_commit) {
+                       btrfs_start_delalloc_inodes(root);
+                       ret = btrfs_wait_ordered_extents(root, 0);
+                       BUG_ON(ret);
+               } else if (snap_pending) {
                        ret = btrfs_wait_ordered_extents(root, 1);
                        BUG_ON(ret);
                }
@@ -1009,15 +1010,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_set_root_node(&root->fs_info->tree_root->root_item,
                            root->fs_info->tree_root->node);
-       free_extent_buffer(root->fs_info->tree_root->commit_root);
-       root->fs_info->tree_root->commit_root =
-                               btrfs_root_node(root->fs_info->tree_root);
+       switch_commit_root(root->fs_info->tree_root);
 
        btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
                            root->fs_info->chunk_root->node);
-       free_extent_buffer(root->fs_info->chunk_root->commit_root);
-       root->fs_info->chunk_root->commit_root =
-                               btrfs_root_node(root->fs_info->chunk_root);
+       switch_commit_root(root->fs_info->chunk_root);
 
        update_super_roots(root);
 
@@ -1057,6 +1054,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        cur_trans->commit_done = 1;
 
        root->fs_info->last_trans_committed = cur_trans->transid;
+
        wake_up(&cur_trans->commit_wait);
 
        put_transaction(cur_trans);
index c13922206d1be59196a8c59c95f9a98bae12be74..d91b0de7c502d13ef51eaeaf667b61cc0230d8a8 100644 (file)
@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                return -ENOENT;
 
        inode = read_one_inode(root, key->objectid);
-       BUG_ON(!dir);
+       BUG_ON(!inode);
 
        ref_ptr = btrfs_item_ptr_offset(eb, slot);
        ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
index 3ab80e9cd76767a674e3005185ee99f3afa53433..5dbefd11b4af524f969c21b857067281eab0dc71 100644 (file)
@@ -721,7 +721,8 @@ error:
  */
 static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
                                         struct btrfs_device *device,
-                                        u64 num_bytes, u64 *start)
+                                        u64 num_bytes, u64 *start,
+                                        u64 *max_avail)
 {
        struct btrfs_key key;
        struct btrfs_root *root = device->dev_root;
@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
-       ret = btrfs_previous_item(root, path, 0, key.type);
-       if (ret < 0)
-               goto error;
+       if (ret > 0) {
+               ret = btrfs_previous_item(root, path, key.objectid, key.type);
+               if (ret < 0)
+                       goto error;
+               if (ret > 0)
+                       start_found = 1;
+       }
        l = path->nodes[0];
        btrfs_item_key_to_cpu(l, &key, path->slots[0]);
        while (1) {
@@ -803,6 +808,10 @@ no_more_items:
                        if (last_byte < search_start)
                                last_byte = search_start;
                        hole_size = key.offset - last_byte;
+
+                       if (hole_size > *max_avail)
+                               *max_avail = hole_size;
+
                        if (key.offset > last_byte &&
                            hole_size >= num_bytes) {
                                *start = last_byte;
@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
        device->fs_devices->total_rw_bytes += diff;
 
        device->total_bytes = new_size;
+       device->disk_total_bytes = new_size;
        btrfs_clear_space_info_full(device->dev_root->fs_info);
 
        return btrfs_update_device(trans, device);
@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
                if (ret) {
                        ret = 0;
-                       goto done;
+                       break;
                }
 
                l = path->nodes[0];
@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
                if (key.objectid != device->devid)
-                       goto done;
+                       break;
 
                dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
                length = btrfs_dev_extent_length(l, dev_extent);
@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                             max_chunk_size);
 
 again:
+       max_avail = 0;
        if (!map || map->num_stripes != num_stripes) {
                kfree(map);
                map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -2219,7 +2230,8 @@ again:
 
                if (device->in_fs_metadata && avail >= min_free) {
                        ret = find_free_dev_extent(trans, device,
-                                                  min_free, &dev_offset);
+                                                  min_free, &dev_offset,
+                                                  &max_avail);
                        if (ret == 0) {
                                list_move_tail(&device->dev_alloc_list,
                                               &private_devs);
@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                }
        }
 
-       for (i = 0; i > nr; i++) {
-               struct btrfs_multi_bio *multi;
-               struct btrfs_bio_stripe *stripe;
-               int ret;
-
-               length = 1;
-               ret = btrfs_map_block(map_tree, WRITE, buf[i],
-                                     &length, &multi, 0);
-               BUG_ON(ret);
-
-               stripe = multi->stripes;
-               for (j = 0; j < multi->num_stripes; j++) {
-                       if (stripe->physical >= physical &&
-                           physical < stripe->physical + length)
-                               break;
-               }
-               BUG_ON(j >= multi->num_stripes);
-               kfree(multi);
-       }
-
        *logical = buf;
        *naddrs = nr;
        *stripe_len = map->stripe_len;