Btrfs: Don't loop forever on metadata IO failures
Chris Mason [Thu, 14 May 2009 17:24:30 +0000 (13:24 -0400)]
When a btrfs metadata read fails, the first thing we try to do is find
a good copy on another mirror of the block.  If this fails, read_tree_block()
ends up returning a buffer that isn't up to date.

The btrfs btree reading code was reworked to drop locks and repeat
the search when IO was done, but the changes didn't add a check for failed
reads.  The end result was looping forever on buffers that were never
going to become up to date.

Signed-off-by: Chris Mason <chris.mason@oracle.com>

fs/btrfs/ctree.c

index a99f1c2..fedf8b9 100644 (file)
@@ -1469,6 +1469,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        u32 blocksize;
        struct extent_buffer *b = *eb_ret;
        struct extent_buffer *tmp;
+       int ret;
 
        blocknr = btrfs_node_blockptr(b, slot);
        gen = btrfs_node_ptr_generation(b, slot);
@@ -1476,6 +1477,10 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+               /*
+                * we found an up to date block without sleeping, return
+                * right away
+                */
                *eb_ret = tmp;
                return 0;
        }
@@ -1483,7 +1488,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        /*
         * reduce lock contention at high levels
         * of the btree by dropping locks before
-        * we read.
+        * we read.  Don't release the lock on the current
+        * level because we need to walk this node to figure
+        * out which blocks to read.
         */
        btrfs_unlock_up_safe(p, level + 1);
        btrfs_set_path_blocking(p);
@@ -1494,10 +1501,21 @@ read_block_for_search(struct btrfs_trans_handle *trans,
                reada_for_search(root, p, level, slot, key->objectid);
 
        btrfs_release_path(NULL, p);
+
+       ret = -EAGAIN;
        tmp = read_tree_block(root, blocknr, blocksize, gen);
-       if (tmp)
+       if (tmp) {
+               /*
+                * If the read above didn't mark this buffer up to date,
+                * it will never end up being up to date.  Set ret to EIO now
+                * and give up so that our caller doesn't loop forever
+                * on our EAGAINs.
+                */
+               if (!btrfs_buffer_uptodate(tmp, 0))
+                       ret = -EIO;
                free_extent_buffer(tmp);
-       return -EAGAIN;
+       }
+       return ret;
 }
 
 /*
@@ -1696,6 +1714,9 @@ cow_done:
                        if (ret == -EAGAIN)
                                goto again;
 
+                       if (ret == -EIO)
+                               goto done;
+
                        if (!p->skip_locking) {
                                int lret;
 
@@ -1738,6 +1759,8 @@ done:
         */
        if (!p->leave_spinning)
                btrfs_set_path_blocking(p);
+       if (ret < 0)
+               btrfs_release_path(root, p);
        return ret;
 }
 
@@ -4212,6 +4235,11 @@ again:
                if (ret == -EAGAIN)
                        goto again;
 
+               if (ret < 0) {
+                       btrfs_release_path(root, path);
+                       goto done;
+               }
+
                if (!path->skip_locking) {
                        ret = btrfs_try_spin_lock(next);
                        if (!ret) {
@@ -4246,6 +4274,11 @@ again:
                if (ret == -EAGAIN)
                        goto again;
 
+               if (ret < 0) {
+                       btrfs_release_path(root, path);
+                       goto done;
+               }
+
                if (!path->skip_locking) {
                        btrfs_assert_tree_locked(path->nodes[level]);
                        ret = btrfs_try_spin_lock(next);