Btrfs: fix leaking block group on balance
Yan Zheng [Thu, 11 Dec 2008 21:30:39 +0000 (16:30 -0500)]
The block group structs are referenced in many different
places, and it's not safe to free while balancing.  So, those block
group structs were simply leaked instead.

This patch replaces the block group pointer in the inode with the starting byte
offset of the block group and adds reference counting to the block group
struct.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>

fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index 1b9ec1a..a8c9693 100644 (file)
@@ -28,11 +28,6 @@ struct btrfs_inode {
        /* which subvolume this inode belongs to */
        struct btrfs_root *root;
 
-       /* the block group preferred for allocations.  This pointer is buggy
-        * and needs to be replaced with a bytenr instead
-        */
-       struct btrfs_block_group_cache *block_group;
-
        /* key used to find this inode on disk.  This is used by the code
         * to read in roots of subvolumes
         */
@@ -115,6 +110,9 @@ struct btrfs_inode {
         */
        u64 index_cnt;
 
+       /* the start of block group preferred for allocations. */
+       u64 block_group;
+
        struct inode vfs_inode;
 };
 
index 5b0c79d..8733081 100644 (file)
@@ -653,6 +653,9 @@ struct btrfs_block_group_cache {
 
        /* for block groups in the same raid type */
        struct list_head list;
+
+       /* usage count */
+       atomic_t count;
 };
 
 struct btrfs_leaf_ref_tree {
@@ -1706,10 +1709,8 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
                                                         btrfs_fs_info *info,
                                                         u64 bytenr);
-struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
-                                                struct btrfs_block_group_cache
-                                                *hint, u64 search_start,
-                                                int data, int owner);
+u64 btrfs_find_block_group(struct btrfs_root *root,
+                          u64 search_start, u64 search_hint, int owner);
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                             struct btrfs_root *root,
                                             u32 blocksize, u64 parent,
@@ -1770,6 +1771,7 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
                            u64 owner_objectid);
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
+int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
@@ -2019,10 +2021,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
 int btrfs_writepages(struct address_space *mapping,
                     struct writeback_control *wbc);
-int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
-               struct btrfs_trans_handle *trans, u64 new_dirid,
-               struct btrfs_block_group_cache *block_group);
-
+int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *new_root, struct dentry *dentry,
+                            u64 new_dirid, u64 alloc_hint);
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio, unsigned long bio_flags);
 
index 673ff59..1cc8924 100644 (file)
@@ -53,10 +53,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
                                 btrfs_root *extent_root, int all);
 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
                               btrfs_root *extent_root, int all);
-static struct btrfs_block_group_cache *
-__btrfs_find_block_group(struct btrfs_root *root,
-                        struct btrfs_block_group_cache *hint,
-                        u64 search_start, int data, int owner);
 static int pin_down_bytes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          u64 bytenr, u64 num_bytes, int is_data);
@@ -142,6 +138,8 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
                        break;
                }
        }
+       if (ret)
+               atomic_inc(&ret->count);
        spin_unlock(&info->block_group_cache_lock);
 
        return ret;
@@ -318,6 +316,12 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
        return cache;
 }
 
+static inline void put_block_group(struct btrfs_block_group_cache *cache)
+{
+       if (atomic_dec_and_test(&cache->count))
+               kfree(cache);
+}
+
 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
                                                  u64 flags)
 {
@@ -341,54 +345,16 @@ static u64 div_factor(u64 num, int factor)
        return num;
 }
 
-static struct btrfs_block_group_cache *
-__btrfs_find_block_group(struct btrfs_root *root,
-                        struct btrfs_block_group_cache *hint,
-                        u64 search_start, int data, int owner)
+u64 btrfs_find_block_group(struct btrfs_root *root,
+                          u64 search_start, u64 search_hint, int owner)
 {
        struct btrfs_block_group_cache *cache;
-       struct btrfs_block_group_cache *found_group = NULL;
-       struct btrfs_fs_info *info = root->fs_info;
        u64 used;
-       u64 last = 0;
-       u64 free_check;
+       u64 last = max(search_hint, search_start);
+       u64 group_start = 0;
        int full_search = 0;
-       int factor = 10;
+       int factor = 9;
        int wrapped = 0;
-
-       if (data & BTRFS_BLOCK_GROUP_METADATA)
-               factor = 9;
-
-       if (search_start) {
-               struct btrfs_block_group_cache *shint;
-               shint = btrfs_lookup_first_block_group(info, search_start);
-               if (shint && block_group_bits(shint, data)) {
-                       spin_lock(&shint->lock);
-                       used = btrfs_block_group_used(&shint->item);
-                       if (used + shint->pinned + shint->reserved <
-                           div_factor(shint->key.offset, factor)) {
-                               spin_unlock(&shint->lock);
-                               return shint;
-                       }
-                       spin_unlock(&shint->lock);
-               }
-       }
-       if (hint && block_group_bits(hint, data)) {
-               spin_lock(&hint->lock);
-               used = btrfs_block_group_used(&hint->item);
-               if (used + hint->pinned + hint->reserved <
-                   div_factor(hint->key.offset, factor)) {
-                       spin_unlock(&hint->lock);
-                       return hint;
-               }
-               spin_unlock(&hint->lock);
-               last = hint->key.objectid + hint->key.offset;
-       } else {
-               if (hint)
-                       last = max(hint->key.objectid, search_start);
-               else
-                       last = search_start;
-       }
 again:
        while (1) {
                cache = btrfs_lookup_first_block_group(root->fs_info, last);
@@ -399,16 +365,18 @@ again:
                last = cache->key.objectid + cache->key.offset;
                used = btrfs_block_group_used(&cache->item);
 
-               if (block_group_bits(cache, data)) {
-                       free_check = div_factor(cache->key.offset, factor);
+               if ((full_search || !cache->ro) &&
+                   block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
                        if (used + cache->pinned + cache->reserved <
-                           free_check) {
-                               found_group = cache;
+                           div_factor(cache->key.offset, factor)) {
+                               group_start = cache->key.objectid;
                                spin_unlock(&cache->lock);
+                               put_block_group(cache);
                                goto found;
                        }
                }
                spin_unlock(&cache->lock);
+               put_block_group(cache);
                cond_resched();
        }
        if (!wrapped) {
@@ -423,18 +391,7 @@ again:
                goto again;
        }
 found:
-       return found_group;
-}
-
-struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
-                                                struct btrfs_block_group_cache
-                                                *hint, u64 search_start,
-                                                int data, int owner)
-{
-
-       struct btrfs_block_group_cache *ret;
-       ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
-       return ret;
+       return group_start;
 }
 
 /* simple helper to search for an existing extent at a given offset */
@@ -1809,6 +1766,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
        return werr;
 }
 
+int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
+{
+       struct btrfs_block_group_cache *block_group;
+       int readonly = 0;
+
+       block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
+       if (!block_group || block_group->ro)
+               readonly = 1;
+       if (block_group)
+               put_block_group(block_group);
+       return readonly;
+}
+
 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                             u64 total_bytes, u64 bytes_used,
                             struct btrfs_space_info **space_info)
@@ -1995,10 +1965,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                                int ret;
                                ret = btrfs_add_free_space(cache, bytenr,
                                                           num_bytes);
-                               if (ret)
-                                       return -1;
+                               WARN_ON(ret);
                        }
                }
+               put_block_group(cache);
                total -= num_bytes;
                bytenr += num_bytes;
        }
@@ -2008,12 +1978,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
 {
        struct btrfs_block_group_cache *cache;
+       u64 bytenr;
 
        cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
        if (!cache)
                return 0;
 
-       return cache->key.objectid;
+       bytenr = cache->key.objectid;
+       put_block_group(cache);
+
+       return bytenr;
 }
 
 int btrfs_update_pinned_extents(struct btrfs_root *root,
@@ -2055,6 +2029,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
                        if (cache->cached)
                                btrfs_add_free_space(cache, bytenr, len);
                }
+               put_block_group(cache);
                bytenr += len;
                num -= len;
        }
@@ -2085,6 +2060,7 @@ static int update_reserved_extents(struct btrfs_root *root,
                }
                spin_unlock(&cache->lock);
                spin_unlock(&cache->space_info->lock);
+               put_block_group(cache);
                bytenr += len;
                num -= len;
        }
@@ -2724,6 +2700,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        cache = btrfs_lookup_block_group(root->fs_info, bytenr);
                        BUG_ON(!cache);
                        btrfs_add_free_space(cache, bytenr, num_bytes);
+                       put_block_group(cache);
                        update_reserved_extents(root, bytenr, num_bytes, 0);
                        return 0;
                }
@@ -2928,6 +2905,8 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
                }
 new_group:
                mutex_unlock(&block_group->alloc_mutex);
+               put_block_group(block_group);
+               block_group = NULL;
 new_group_no_lock:
                /* don't try to compare new allocations against the
                 * last allocation any more
@@ -2997,6 +2976,8 @@ loop_check:
 
                block_group = list_entry(cur, struct btrfs_block_group_cache,
                                         list);
+               atomic_inc(&block_group->count);
+
                search_start = block_group->key.objectid;
                cur = cur->next;
        }
@@ -3004,7 +2985,7 @@ loop_check:
        /* we found what we needed */
        if (ins->objectid) {
                if (!(data & BTRFS_BLOCK_GROUP_DATA))
-                       trans->block_group = block_group;
+                       trans->block_group = block_group->key.objectid;
 
                if (last_ptr)
                        *last_ptr = ins->objectid + ins->offset;
@@ -3015,6 +2996,8 @@ loop_check:
                       loop, allowed_chunk_alloc);
                ret = -ENOSPC;
        }
+       if (block_group)
+               put_block_group(block_group);
 
        up_read(&space_info->groups_sem);
        return ret;
@@ -3124,6 +3107,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
                return -ENOSPC;
        }
        btrfs_add_free_space(cache, start, len);
+       put_block_group(cache);
        update_reserved_extents(root, start, len, 0);
        return 0;
 }
@@ -3288,6 +3272,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_remove_free_space(block_group, ins->objectid,
                                      ins->offset);
        BUG_ON(ret);
+       put_block_group(block_group);
        ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
                                            ref_generation, owner, ins);
        return ret;
@@ -5703,6 +5688,7 @@ next:
        WARN_ON(block_group->reserved > 0);
        WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
        spin_unlock(&block_group->lock);
+       put_block_group(block_group);
        ret = 0;
 out:
        btrfs_free_path(path);
@@ -5763,6 +5749,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                down_write(&block_group->space_info->groups_sem);
                list_del(&block_group->list);
                up_write(&block_group->space_info->groups_sem);
+
+               WARN_ON(atomic_read(&block_group->count) != 1);
                kfree(block_group);
 
                spin_lock(&info->block_group_cache_lock);
@@ -5807,6 +5795,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                        break;
                }
 
+               atomic_set(&cache->count, 1);
                spin_lock_init(&cache->lock);
                mutex_init(&cache->alloc_mutex);
                mutex_init(&cache->cache_mutex);
@@ -5861,11 +5850,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        cache->key.objectid = chunk_offset;
        cache->key.offset = size;
+       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       atomic_set(&cache->count, 1);
        spin_lock_init(&cache->lock);
        mutex_init(&cache->alloc_mutex);
        mutex_init(&cache->cache_mutex);
        INIT_LIST_HEAD(&cache->list);
-       btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
 
        btrfs_set_block_group_used(&cache->item, bytes_used);
        btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
@@ -5926,10 +5916,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&block_group->space_info->lock);
        block_group->space_info->full = 0;
 
-       /*
-       memset(shrink_block_group, 0, sizeof(*shrink_block_group));
-       kfree(shrink_block_group);
-       */
+       put_block_group(block_group);
+       put_block_group(block_group);
 
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0)
index 932d8c0..0a28b77 100644 (file)
@@ -989,7 +989,6 @@ next_slot:
 
                if (extent_type == BTRFS_FILE_EXTENT_REG ||
                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
-                       struct btrfs_block_group_cache *block_group;
                        disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
                        extent_end = found_key.offset +
                                btrfs_file_extent_num_bytes(leaf, fi);
@@ -1007,9 +1006,7 @@ next_slot:
                                goto out_check;
                        if (btrfs_cross_ref_exist(trans, root, disk_bytenr))
                                goto out_check;
-                       block_group = btrfs_lookup_block_group(root->fs_info,
-                                                              disk_bytenr);
-                       if (!block_group || block_group->ro)
+                       if (btrfs_extent_readonly(root, disk_bytenr))
                                goto out_check;
                        disk_bytenr += btrfs_file_extent_offset(leaf, fi);
                        nocow = 1;
@@ -1969,16 +1966,11 @@ void btrfs_read_locked_inode(struct inode *inode)
        rdev = btrfs_inode_rdev(leaf, inode_item);
 
        BTRFS_I(inode)->index_cnt = (u64)-1;
+       BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
        alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-       BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
-                                                      alloc_group_block);
-       BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-       if (!BTRFS_I(inode)->block_group) {
-               BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
-                                                NULL, 0,
-                                                BTRFS_BLOCK_GROUP_METADATA, 0);
-       }
+       BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
+                                               alloc_group_block, 0);
        btrfs_free_path(path);
        inode_item = NULL;
 
@@ -2048,8 +2040,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_transid(leaf, item, trans->transid);
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-       btrfs_set_inode_block_group(leaf, item,
-                                   BTRFS_I(inode)->block_group->key.objectid);
+       btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
 }
 
 /*
@@ -3358,14 +3349,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
                                     const char *name, int name_len,
-                                    u64 ref_objectid,
-                                    u64 objectid,
-                                    struct btrfs_block_group_cache *group,
-                                    int mode, u64 *index)
+                                    u64 ref_objectid, u64 objectid,
+                                    u64 alloc_hint, int mode, u64 *index)
 {
        struct inode *inode;
        struct btrfs_inode_item *inode_item;
-       struct btrfs_block_group_cache *new_inode_group;
        struct btrfs_key *location;
        struct btrfs_path *path;
        struct btrfs_inode_ref *ref;
@@ -3401,13 +3389,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                owner = 0;
        else
                owner = 1;
-       new_inode_group = btrfs_find_block_group(root, group, 0,
-                                      BTRFS_BLOCK_GROUP_METADATA, owner);
-       if (!new_inode_group) {
-               printk("find_block group failed\n");
-               new_inode_group = group;
-       }
-       BTRFS_I(inode)->block_group = new_inode_group;
+       BTRFS_I(inode)->block_group =
+                       btrfs_find_block_group(root, 0, alloc_hint, owner);
 
        key[0].objectid = objectid;
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4366,16 +4349,16 @@ out:
 /*
  * create a new subvolume directory/inode (helper for the ioctl).
  */
-int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
-               struct btrfs_trans_handle *trans, u64 new_dirid,
-               struct btrfs_block_group_cache *block_group)
+int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *new_root, struct dentry *dentry,
+                            u64 new_dirid, u64 alloc_hint)
 {
        struct inode *inode;
        int error;
        u64 index = 0;
 
        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
-                               new_dirid, block_group, S_IFDIR | 0700, &index);
+                               new_dirid, alloc_hint, S_IFDIR | 0700, &index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
        inode->i_op = &btrfs_dir_inode_operations;
index 69c4a07..5d67858 100644 (file)
@@ -173,7 +173,7 @@ static noinline int create_subvol(struct btrfs_root *root,
        trans = btrfs_start_transaction(new_root, 1);
        BUG_ON(!trans);
 
-       ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid,
+       ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
                                       BTRFS_I(dir)->block_group);
        if (ret)
                goto fail;
index 47cd5fc..4604178 100644 (file)
@@ -182,7 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
        h->transaction = root->fs_info->running_transaction;
        h->blocks_reserved = num_blocks;
        h->blocks_used = 0;
-       h->block_group = NULL;
+       h->block_group = 0;
        h->alloc_exclude_nr = 0;
        h->alloc_exclude_start = 0;
        root->fs_info->running_transaction->use_count++;
index 202c8be..ffe7f63 100644 (file)
@@ -41,7 +41,7 @@ struct btrfs_trans_handle {
        unsigned long blocks_reserved;
        unsigned long blocks_used;
        struct btrfs_transaction *transaction;
-       struct btrfs_block_group_cache *block_group;
+       u64 block_group;
        u64 alloc_exclude_start;
        u64 alloc_exclude_nr;
 };