Btrfs: cleanup scrub bio and worker wait code
[linux-3.10.git] / fs / btrfs / extent-tree.c
index a3a902f..f8a358a 100644 (file)
@@ -33,6 +33,7 @@
 #include "volumes.h"
 #include "locking.h"
 #include "free-space-cache.h"
+#include "math.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -312,7 +313,8 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        while (start < end) {
                ret = find_first_extent_bit(info->pinned_extents, start,
                                            &extent_start, &extent_end,
-                                           EXTENT_DIRTY | EXTENT_UPTODATE);
+                                           EXTENT_DIRTY | EXTENT_UPTODATE,
+                                           NULL);
                if (ret)
                        break;
 
@@ -648,24 +650,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
 
-static u64 div_factor(u64 num, int factor)
-{
-       if (factor == 10)
-               return num;
-       num *= factor;
-       do_div(num, 10);
-       return num;
-}
-
-static u64 div_factor_fine(u64 num, int factor)
-{
-       if (factor == 100)
-               return num;
-       num *= factor;
-       do_div(num, 100);
-       return num;
-}
-
 u64 btrfs_find_block_group(struct btrfs_root *root,
                           u64 search_start, u64 search_hint, int owner)
 {
@@ -2313,6 +2297,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                                kfree(extent_op);
 
                                if (ret) {
+                                       list_del_init(&locked_ref->cluster);
+                                       mutex_unlock(&locked_ref->mutex);
+
                                        printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
                                        spin_lock(&delayed_refs->lock);
                                        return ret;
@@ -2355,6 +2342,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                count++;
 
                if (ret) {
+                       if (locked_ref) {
+                               list_del_init(&locked_ref->cluster);
+                               mutex_unlock(&locked_ref->mutex);
+                       }
                        printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
                        spin_lock(&delayed_refs->lock);
                        return ret;
@@ -3660,7 +3651,7 @@ out:
 
 static int can_overcommit(struct btrfs_root *root,
                          struct btrfs_space_info *space_info, u64 bytes,
-                         int flush)
+                         enum btrfs_reserve_flush_enum flush)
 {
        u64 profile = btrfs_get_alloc_profile(root, 0);
        u64 avail;
@@ -3684,11 +3675,11 @@ static int can_overcommit(struct btrfs_root *root,
                avail >>= 1;
 
        /*
-        * If we aren't flushing don't let us overcommit too much, say
-        * 1/8th of the space.  If we can flush, let it overcommit up to
-        * 1/2 of the space.
+        * If we aren't flushing all things, let us overcommit up to
+        * 1/2th of the space. If we can flush, don't let us overcommit
+        * too much, let it overcommit up to 1/8 of the space.
         */
-       if (flush)
+       if (flush == BTRFS_RESERVE_FLUSH_ALL)
                avail >>= 3;
        else
                avail >>= 1;
@@ -3712,6 +3703,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
        long time_left;
        unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
        int loops = 0;
+       enum btrfs_reserve_flush_enum flush;
 
        trans = (struct btrfs_trans_handle *)current->journal_info;
        block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -3739,8 +3731,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
                wait_event(root->fs_info->async_submit_wait,
                           !atomic_read(&root->fs_info->async_delalloc_pages));
 
+               if (!trans)
+                       flush = BTRFS_RESERVE_FLUSH_ALL;
+               else
+                       flush = BTRFS_RESERVE_NO_FLUSH;
                spin_lock(&space_info->lock);
-               if (can_overcommit(root, space_info, orig, !trans)) {
+               if (can_overcommit(root, space_info, orig, flush)) {
                        spin_unlock(&space_info->lock);
                        break;
                }
@@ -3817,10 +3813,10 @@ commit:
 }
 
 enum flush_state {
-       FLUSH_DELALLOC          =       1,
-       FLUSH_DELALLOC_WAIT     =       2,
-       FLUSH_DELAYED_ITEMS_NR  =       3,
-       FLUSH_DELAYED_ITEMS     =       4,
+       FLUSH_DELAYED_ITEMS_NR  =       1,
+       FLUSH_DELAYED_ITEMS     =       2,
+       FLUSH_DELALLOC          =       3,
+       FLUSH_DELALLOC_WAIT     =       4,
        ALLOC_CHUNK             =       5,
        COMMIT_TRANS            =       6,
 };
@@ -3834,11 +3830,6 @@ static int flush_space(struct btrfs_root *root,
        int ret = 0;
 
        switch (state) {
-       case FLUSH_DELALLOC:
-       case FLUSH_DELALLOC_WAIT:
-               shrink_delalloc(root, num_bytes, orig_bytes,
-                               state == FLUSH_DELALLOC_WAIT);
-               break;
        case FLUSH_DELAYED_ITEMS_NR:
        case FLUSH_DELAYED_ITEMS:
                if (state == FLUSH_DELAYED_ITEMS_NR) {
@@ -3859,6 +3850,11 @@ static int flush_space(struct btrfs_root *root,
                ret = btrfs_run_delayed_items_nr(trans, root, nr);
                btrfs_end_transaction(trans, root);
                break;
+       case FLUSH_DELALLOC:
+       case FLUSH_DELALLOC_WAIT:
+               shrink_delalloc(root, num_bytes, orig_bytes,
+                               state == FLUSH_DELALLOC_WAIT);
+               break;
        case ALLOC_CHUNK:
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
@@ -3898,24 +3894,25 @@ static int flush_space(struct btrfs_root *root,
  */
 static int reserve_metadata_bytes(struct btrfs_root *root,
                                  struct btrfs_block_rsv *block_rsv,
-                                 u64 orig_bytes, int flush)
+                                 u64 orig_bytes,
+                                 enum btrfs_reserve_flush_enum flush)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
        u64 used;
        u64 num_bytes = orig_bytes;
-       int flush_state = FLUSH_DELALLOC;
+       int flush_state = FLUSH_DELAYED_ITEMS_NR;
        int ret = 0;
        bool flushing = false;
-       bool committed = false;
 
 again:
        ret = 0;
        spin_lock(&space_info->lock);
        /*
-        * We only want to wait if somebody other than us is flushing and we are
-        * actually alloed to flush.
+        * We only want to wait if somebody other than us is flushing and we
+        * are actually allowed to flush all things.
         */
-       while (flush && !flushing && space_info->flush) {
+       while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
+              space_info->flush) {
                spin_unlock(&space_info->lock);
                /*
                 * If we have a trans handle we can't wait because the flusher
@@ -3969,56 +3966,52 @@ again:
                        (orig_bytes * 2);
        }
 
-       if (ret) {
-               u64 avail;
-
-               /*
-                * If we have a lot of space that's pinned, don't bother doing
-                * the overcommit dance yet and just commit the transaction.
-                */
-               avail = (space_info->total_bytes - space_info->bytes_used) * 8;
-               do_div(avail, 10);
-               if (space_info->bytes_pinned >= avail && flush && !committed) {
-                       space_info->flush = 1;
-                       flushing = true;
-                       spin_unlock(&space_info->lock);
-                       ret = may_commit_transaction(root, space_info,
-                                                    orig_bytes, 1);
-                       if (ret)
-                               goto out;
-                       committed = true;
-                       goto again;
-               }
-
-               if (can_overcommit(root, space_info, orig_bytes, flush)) {
-                       space_info->bytes_may_use += orig_bytes;
-                       trace_btrfs_space_reservation(root->fs_info,
-                               "space_info", space_info->flags, orig_bytes, 1);
-                       ret = 0;
-               }
+       if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
+               space_info->bytes_may_use += orig_bytes;
+               trace_btrfs_space_reservation(root->fs_info, "space_info",
+                                             space_info->flags, orig_bytes,
+                                             1);
+               ret = 0;
        }
 
        /*
         * Couldn't make our reservation, save our place so while we're trying
         * to reclaim space we can actually use it instead of somebody else
         * stealing it from us.
+        *
+        * We make the other tasks wait for the flush only when we can flush
+        * all things.
         */
-       if (ret && flush) {
+       if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) {
                flushing = true;
                space_info->flush = 1;
        }
 
        spin_unlock(&space_info->lock);
 
-       if (!ret || !flush)
+       if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
                goto out;
 
        ret = flush_space(root, space_info, num_bytes, orig_bytes,
                          flush_state);
        flush_state++;
+
+       /*
+        * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock
+        * would happen. So skip delalloc flush.
+        */
+       if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
+           (flush_state == FLUSH_DELALLOC ||
+            flush_state == FLUSH_DELALLOC_WAIT))
+               flush_state = ALLOC_CHUNK;
+
        if (!ret)
                goto again;
-       else if (flush_state <= COMMIT_TRANS)
+       else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
+                flush_state < COMMIT_TRANS)
+               goto again;
+       else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+                flush_state <= COMMIT_TRANS)
                goto again;
 
 out:
@@ -4169,9 +4162,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
        kfree(rsv);
 }
 
-static inline int __block_rsv_add(struct btrfs_root *root,
-                                 struct btrfs_block_rsv *block_rsv,
-                                 u64 num_bytes, int flush)
+int btrfs_block_rsv_add(struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+                       enum btrfs_reserve_flush_enum flush)
 {
        int ret;
 
@@ -4187,20 +4180,6 @@ static inline int __block_rsv_add(struct btrfs_root *root,
        return ret;
 }
 
-int btrfs_block_rsv_add(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes)
-{
-       return __block_rsv_add(root, block_rsv, num_bytes, 1);
-}
-
-int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
-                               struct btrfs_block_rsv *block_rsv,
-                               u64 num_bytes)
-{
-       return __block_rsv_add(root, block_rsv, num_bytes, 0);
-}
-
 int btrfs_block_rsv_check(struct btrfs_root *root,
                          struct btrfs_block_rsv *block_rsv, int min_factor)
 {
@@ -4219,9 +4198,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root,
        return ret;
 }
 
-static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
-                                          struct btrfs_block_rsv *block_rsv,
-                                          u64 min_reserved, int flush)
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+                          struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+                          enum btrfs_reserve_flush_enum flush)
 {
        u64 num_bytes = 0;
        int ret = -ENOSPC;
@@ -4249,20 +4228,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
        return ret;
 }
 
-int btrfs_block_rsv_refill(struct btrfs_root *root,
-                          struct btrfs_block_rsv *block_rsv,
-                          u64 min_reserved)
-{
-       return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
-}
-
-int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
-                                  struct btrfs_block_rsv *block_rsv,
-                                  u64 min_reserved)
-{
-       return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
-}
-
 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
                            struct btrfs_block_rsv *dst_rsv,
                            u64 num_bytes)
@@ -4553,14 +4518,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        u64 csum_bytes;
        unsigned nr_extents = 0;
        int extra_reserve = 0;
-       int flush = 1;
+       enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
        int ret;
 
        /* Need to be holding the i_mutex here if we aren't free space cache */
        if (btrfs_is_free_space_inode(inode))
-               flush = 0;
+               flush = BTRFS_RESERVE_NO_FLUSH;
 
-       if (flush && btrfs_transaction_in_commit(root->fs_info))
+       if (flush != BTRFS_RESERVE_NO_FLUSH &&
+           btrfs_transaction_in_commit(root->fs_info))
                schedule_timeout(1);
 
        mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
@@ -4990,9 +4956,13 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_block_group_cache *cache = NULL;
+       struct btrfs_space_info *space_info;
+       struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
        u64 len;
+       bool readonly;
 
        while (start <= end) {
+               readonly = false;
                if (!cache ||
                    start >= cache->key.objectid + cache->key.offset) {
                        if (cache)
@@ -5010,15 +4980,30 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
                }
 
                start += len;
+               space_info = cache->space_info;
 
-               spin_lock(&cache->space_info->lock);
+               spin_lock(&space_info->lock);
                spin_lock(&cache->lock);
                cache->pinned -= len;
-               cache->space_info->bytes_pinned -= len;
-               if (cache->ro)
-                       cache->space_info->bytes_readonly += len;
+               space_info->bytes_pinned -= len;
+               if (cache->ro) {
+                       space_info->bytes_readonly += len;
+                       readonly = true;
+               }
                spin_unlock(&cache->lock);
-               spin_unlock(&cache->space_info->lock);
+               if (!readonly && global_rsv->space_info == space_info) {
+                       spin_lock(&global_rsv->lock);
+                       if (!global_rsv->full) {
+                               len = min(len, global_rsv->size -
+                                         global_rsv->reserved);
+                               global_rsv->reserved += len;
+                               space_info->bytes_may_use += len;
+                               if (global_rsv->reserved >= global_rsv->size)
+                                       global_rsv->full = 1;
+                       }
+                       spin_unlock(&global_rsv->lock);
+               }
+               spin_unlock(&space_info->lock);
        }
 
        if (cache)
@@ -5045,7 +5030,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
        while (1) {
                ret = find_first_extent_bit(unpin, 0, &start, &end,
-                                           EXTENT_DIRTY);
+                                           EXTENT_DIRTY, NULL);
                if (ret)
                        break;
 
@@ -5123,8 +5108,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        ret = remove_extent_backref(trans, extent_root, path,
                                                    NULL, refs_to_drop,
                                                    is_data);
-                       if (ret)
-                               goto abort;
+                       if (ret) {
+                               btrfs_abort_transaction(trans, extent_root, ret);
+                               goto out;
+                       }
                        btrfs_release_path(path);
                        path->leave_spinning = 1;
 
@@ -5142,8 +5129,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                        btrfs_print_leaf(extent_root,
                                                         path->nodes[0]);
                        }
-                       if (ret < 0)
-                               goto abort;
+                       if (ret < 0) {
+                               btrfs_abort_transaction(trans, extent_root, ret);
+                               goto out;
+                       }
                        extent_slot = path->slots[0];
                }
        } else if (ret == -ENOENT) {
@@ -5157,7 +5146,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                       (unsigned long long)owner_objectid,
                       (unsigned long long)owner_offset);
        } else {
-               goto abort;
+               btrfs_abort_transaction(trans, extent_root, ret);
+               goto out;
        }
 
        leaf = path->nodes[0];
@@ -5167,8 +5157,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                BUG_ON(found_extent || extent_slot != path->slots[0]);
                ret = convert_extent_item_v0(trans, extent_root, path,
                                             owner_objectid, 0);
-               if (ret < 0)
-                       goto abort;
+               if (ret < 0) {
+                       btrfs_abort_transaction(trans, extent_root, ret);
+                       goto out;
+               }
 
                btrfs_release_path(path);
                path->leave_spinning = 1;
@@ -5185,8 +5177,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                               (unsigned long long)bytenr);
                        btrfs_print_leaf(extent_root, path->nodes[0]);
                }
-               if (ret < 0)
-                       goto abort;
+               if (ret < 0) {
+                       btrfs_abort_transaction(trans, extent_root, ret);
+                       goto out;
+               }
+
                extent_slot = path->slots[0];
                leaf = path->nodes[0];
                item_size = btrfs_item_size_nr(leaf, extent_slot);
@@ -5223,8 +5218,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        ret = remove_extent_backref(trans, extent_root, path,
                                                    iref, refs_to_drop,
                                                    is_data);
-                       if (ret)
-                               goto abort;
+                       if (ret) {
+                               btrfs_abort_transaction(trans, extent_root, ret);
+                               goto out;
+                       }
                }
        } else {
                if (found_extent) {
@@ -5241,27 +5238,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
-               if (ret)
-                       goto abort;
+               if (ret) {
+                       btrfs_abort_transaction(trans, extent_root, ret);
+                       goto out;
+               }
                btrfs_release_path(path);
 
                if (is_data) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
-                       if (ret)
-                               goto abort;
+                       if (ret) {
+                               btrfs_abort_transaction(trans, extent_root, ret);
+                               goto out;
+                       }
                }
 
                ret = update_block_group(trans, root, bytenr, num_bytes, 0);
-               if (ret)
-                       goto abort;
+               if (ret) {
+                       btrfs_abort_transaction(trans, extent_root, ret);
+                       goto out;
+               }
        }
 out:
        btrfs_free_path(path);
        return ret;
-
-abort:
-       btrfs_abort_transaction(trans, extent_root, ret);
-       goto out;
 }
 
 /*
@@ -6276,7 +6275,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        block_rsv = get_block_rsv(trans, root);
 
        if (block_rsv->size == 0) {
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
+               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                                            BTRFS_RESERVE_NO_FLUSH);
                /*
                 * If we couldn't reserve metadata bytes try and use some from
                 * the global reserve.
@@ -6299,11 +6299,11 @@ use_block_rsv(struct btrfs_trans_handle *trans,
                static DEFINE_RATELIMIT_STATE(_rs,
                                DEFAULT_RATELIMIT_INTERVAL,
                                /*DEFAULT_RATELIMIT_BURST*/ 2);
-               if (__ratelimit(&_rs)) {
-                       printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
-                       WARN_ON(1);
-               }
-               ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
+               if (__ratelimit(&_rs))
+                       WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n",
+                            ret);
+               ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+                                            BTRFS_RESERVE_NO_FLUSH);
                if (!ret) {
                        return block_rsv;
                } else if (ret && block_rsv != global_rsv) {