Merge branch 'btrfs-3.0' into for-linus
[linux-2.6.git] / fs / btrfs / volumes.c
index f777145..f2a4cc7 100644 (file)
@@ -38,9 +38,6 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_device *device);
 static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
 
-#define map_lookup_size(n) (sizeof(struct map_lookup) + \
-                           (sizeof(struct btrfs_bio_stripe) * (n)))
-
 static DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
 
@@ -145,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
        unsigned long limit;
        unsigned long last_waited = 0;
        int force_reg = 0;
+       int sync_pending = 0;
        struct blk_plug plug;
 
        /*
@@ -232,6 +230,22 @@ loop_lock:
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
+               /*
+                * if we're doing the sync list, record that our
+                * plug has some sync requests on it
+                *
+                * If we're doing the regular list and there are
+                * sync requests sitting around, unplug before
+                * we add more
+                */
+               if (pending_bios == &device->pending_sync_bios) {
+                       sync_pending = 1;
+               } else if (sync_pending) {
+                       blk_finish_plug(&plug);
+                       blk_start_plug(&plug);
+                       sync_pending = 0;
+               }
+
                submit_bio(cur->bi_rw, cur);
                num_run++;
                batch_run++;
@@ -353,7 +367,7 @@ static noinline int device_list_add(const char *path,
                INIT_LIST_HEAD(&device->dev_alloc_list);
 
                mutex_lock(&fs_devices->device_list_mutex);
-               list_add(&device->dev_list, &fs_devices->devices);
+               list_add_rcu(&device->dev_list, &fs_devices->devices);
                mutex_unlock(&fs_devices->device_list_mutex);
 
                device->fs_devices = fs_devices;
@@ -396,7 +410,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        fs_devices->latest_trans = orig->latest_trans;
        memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
-       mutex_lock(&orig->device_list_mutex);
+       /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                device = kzalloc(sizeof(*device), GFP_NOFS);
                if (!device)
@@ -419,10 +433,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
        }
-       mutex_unlock(&orig->device_list_mutex);
        return fs_devices;
 error:
-       mutex_unlock(&orig->device_list_mutex);
        free_fs_devices(fs_devices);
        return ERR_PTR(-ENOMEM);
 }
@@ -433,7 +445,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 
        mutex_lock(&uuid_mutex);
 again:
-       mutex_lock(&fs_devices->device_list_mutex);
+       /* This is the initialized path, it is safe to release the devices. */
        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
                if (device->in_fs_metadata)
                        continue;
@@ -453,7 +465,6 @@ again:
                kfree(device->name);
                kfree(device);
        }
-       mutex_unlock(&fs_devices->device_list_mutex);
 
        if (fs_devices->seed) {
                fs_devices = fs_devices->seed;
@@ -464,6 +475,29 @@ again:
        return 0;
 }
 
+static void __free_device(struct work_struct *work)
+{
+       struct btrfs_device *device;
+
+       device = container_of(work, struct btrfs_device, rcu_work);
+
+       if (device->bdev)
+               blkdev_put(device->bdev, device->mode);
+
+       kfree(device->name);
+       kfree(device);
+}
+
+static void free_device(struct rcu_head *head)
+{
+       struct btrfs_device *device;
+
+       device = container_of(head, struct btrfs_device, rcu);
+
+       INIT_WORK(&device->rcu_work, __free_device);
+       schedule_work(&device->rcu_work);
+}
+
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device;
@@ -471,20 +505,36 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
        if (--fs_devices->opened > 0)
                return 0;
 
+       mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
-               if (device->bdev) {
-                       blkdev_put(device->bdev, device->mode);
+               struct btrfs_device *new_device;
+
+               if (device->bdev)
                        fs_devices->open_devices--;
-               }
+
                if (device->writeable) {
                        list_del_init(&device->dev_alloc_list);
                        fs_devices->rw_devices--;
                }
 
-               device->bdev = NULL;
-               device->writeable = 0;
-               device->in_fs_metadata = 0;
+               if (device->can_discard)
+                       fs_devices->num_can_discard--;
+
+               new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
+               BUG_ON(!new_device);
+               memcpy(new_device, device, sizeof(*new_device));
+               new_device->name = kstrdup(device->name, GFP_NOFS);
+               BUG_ON(device->name && !new_device->name);
+               new_device->bdev = NULL;
+               new_device->writeable = 0;
+               new_device->in_fs_metadata = 0;
+               new_device->can_discard = 0;
+               list_replace_rcu(&device->dev_list, &new_device->dev_list);
+
+               call_rcu(&device->rcu, free_device);
        }
+       mutex_unlock(&fs_devices->device_list_mutex);
+
        WARN_ON(fs_devices->open_devices);
        WARN_ON(fs_devices->rw_devices);
        fs_devices->opened = 0;
@@ -518,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                                fmode_t flags, void *holder)
 {
+       struct request_queue *q;
        struct block_device *bdev;
        struct list_head *head = &fs_devices->devices;
        struct btrfs_device *device;
@@ -574,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                        seeding = 0;
                }
 
+               q = bdev_get_queue(bdev);
+               if (blk_queue_discard(q)) {
+                       device->can_discard = 1;
+                       fs_devices->num_can_discard++;
+               }
+
                device->bdev = bdev;
                device->in_fs_metadata = 0;
                device->mode = flags;
@@ -587,6 +644,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                        list_add(&device->dev_alloc_list,
                                 &fs_devices->alloc_list);
                }
+               brelse(bh);
                continue;
 
 error_brelse:
@@ -659,12 +717,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        transid = btrfs_super_generation(disk_super);
        if (disk_super->label[0])
                printk(KERN_INFO "device label %s ", disk_super->label);
-       else {
-               /* FIXME, make a readl uuid parser */
-               printk(KERN_INFO "device fsid %llx-%llx ",
-                      *(unsigned long long *)disk_super->fsid,
-                      *(unsigned long long *)(disk_super->fsid + 8));
-       }
+       else
+               printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
        printk(KERN_CONT "devid %llu transid %llu %s\n",
               (unsigned long long)devid, (unsigned long long)transid, path);
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
@@ -809,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
 
        max_hole_start = search_start;
        max_hole_size = 0;
+       hole_size = 0;
 
        if (search_start >= search_end) {
                ret = -ENOSPC;
@@ -891,7 +946,14 @@ next:
                cond_resched();
        }
 
-       hole_size = search_end- search_start;
+       /*
+        * At this point, search_start should be the end of
+        * allocated dev extents, and when shrinking the device,
+        * search_end may be smaller than search_start.
+        */
+       if (search_end > search_start)
+               hole_size = search_end - search_start;
+
        if (hole_size > max_hole_size) {
                max_hole_start = search_start;
                max_hole_size = hole_size;
@@ -936,14 +998,14 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        if (ret > 0) {
                ret = btrfs_previous_item(root, path, key.objectid,
                                          BTRFS_DEV_EXTENT_KEY);
-               BUG_ON(ret);
+               if (ret)
+                       goto out;
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                extent = btrfs_item_ptr(leaf, path->slots[0],
                                        struct btrfs_dev_extent);
                BUG_ON(found_key.offset > start || found_key.offset +
                       btrfs_dev_extent_length(leaf, extent) < start);
-               ret = 0;
        } else if (ret == 0) {
                leaf = path->nodes[0];
                extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -954,8 +1016,8 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        if (device->bytes_used > 0)
                device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
        ret = btrfs_del_item(trans, root, path);
-       BUG_ON(ret);
 
+out:
        btrfs_free_path(path);
        return ret;
 }
@@ -1011,7 +1073,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
        struct btrfs_key found_key;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        key.objectid = objectid;
        key.offset = (u64)-1;
@@ -1190,11 +1253,13 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        struct block_device *bdev;
        struct buffer_head *bh = NULL;
        struct btrfs_super_block *disk_super;
+       struct btrfs_fs_devices *cur_devices;
        u64 all_avail;
        u64 devid;
        u64 num_devices;
        u8 *dev_uuid;
        int ret = 0;
+       bool clear_super = false;
 
        mutex_lock(&uuid_mutex);
        mutex_lock(&root->fs_info->volume_mutex);
@@ -1225,14 +1290,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
                device = NULL;
                devices = &root->fs_info->fs_devices->devices;
-               mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+               /*
+                * It is safe to read the devices since the volume_mutex
+                * is held.
+                */
                list_for_each_entry(tmp, devices, dev_list) {
                        if (tmp->in_fs_metadata && !tmp->bdev) {
                                device = tmp;
                                break;
                        }
                }
-               mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
                bdev = NULL;
                bh = NULL;
                disk_super = NULL;
@@ -1274,8 +1341,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        }
 
        if (device->writeable) {
+               lock_chunks(root);
                list_del_init(&device->dev_alloc_list);
+               unlock_chunks(root);
                root->fs_info->fs_devices->rw_devices--;
+               clear_super = true;
        }
 
        ret = btrfs_shrink_device(device, 0);
@@ -1287,15 +1357,17 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                goto error_undo;
 
        device->in_fs_metadata = 0;
+       btrfs_scrub_cancel_dev(root, device);
 
        /*
         * the device list mutex makes sure that we don't change
         * the device list while someone else is writing out all
         * the device supers.
         */
+
+       cur_devices = device->fs_devices;
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
-       list_del_init(&device->dev_list);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+       list_del_rcu(&device->dev_list);
 
        device->fs_devices->num_devices--;
 
@@ -1309,34 +1381,36 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->bdev == root->fs_info->fs_devices->latest_bdev)
                root->fs_info->fs_devices->latest_bdev = next_device->bdev;
 
-       if (device->bdev) {
-               blkdev_put(device->bdev, device->mode);
-               device->bdev = NULL;
+       if (device->bdev)
                device->fs_devices->open_devices--;
-       }
+
+       call_rcu(&device->rcu, free_device);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
        btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
 
-       if (device->fs_devices->open_devices == 0) {
+       if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
                fs_devices = root->fs_info->fs_devices;
                while (fs_devices) {
-                       if (fs_devices->seed == device->fs_devices)
+                       if (fs_devices->seed == cur_devices)
                                break;
                        fs_devices = fs_devices->seed;
                }
-               fs_devices->seed = device->fs_devices->seed;
-               device->fs_devices->seed = NULL;
-               __btrfs_close_devices(device->fs_devices);
-               free_fs_devices(device->fs_devices);
+               fs_devices->seed = cur_devices->seed;
+               cur_devices->seed = NULL;
+               lock_chunks(root);
+               __btrfs_close_devices(cur_devices);
+               unlock_chunks(root);
+               free_fs_devices(cur_devices);
        }
 
        /*
         * at this point, the device is zero sized.  We want to
         * remove it from the devices list and zero out the old super
         */
-       if (device->writeable) {
+       if (clear_super) {
                /* make sure this device isn't detected as part of
                 * the FS anymore
                 */
@@ -1345,8 +1419,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                sync_dirty_buffer(bh);
        }
 
-       kfree(device->name);
-       kfree(device);
        ret = 0;
 
 error_brelse:
@@ -1360,8 +1432,10 @@ out:
        return ret;
 error_undo:
        if (device->writeable) {
+               lock_chunks(root);
                list_add(&device->dev_alloc_list,
                         &root->fs_info->fs_devices->alloc_list);
+               unlock_chunks(root);
                root->fs_info->fs_devices->rw_devices++;
        }
        goto error_brelse;
@@ -1401,7 +1475,12 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
        INIT_LIST_HEAD(&seed_devices->devices);
        INIT_LIST_HEAD(&seed_devices->alloc_list);
        mutex_init(&seed_devices->device_list_mutex);
-       list_splice_init(&fs_devices->devices, &seed_devices->devices);
+
+       mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+       list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
+                             synchronize_rcu);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
        list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
        list_for_each_entry(device, &seed_devices->devices, dev_list) {
                device->fs_devices = seed_devices;
@@ -1500,6 +1579,7 @@ error:
 
 int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 {
+       struct request_queue *q;
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device;
        struct block_device *bdev;
@@ -1569,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
        lock_chunks(root);
 
+       q = bdev_get_queue(bdev);
+       if (blk_queue_discard(q))
+               device->can_discard = 1;
        device->writeable = 1;
        device->work.func = pending_bios_fn;
        generate_random_uuid(device->uuid);
@@ -1598,12 +1681,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
         * half setup
         */
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
-       list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
+       list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
        list_add(&device->dev_alloc_list,
                 &root->fs_info->fs_devices->alloc_list);
        root->fs_info->fs_devices->num_devices++;
        root->fs_info->fs_devices->open_devices++;
        root->fs_info->fs_devices->rw_devices++;
+       if (device->can_discard)
+               root->fs_info->fs_devices->num_can_discard++;
        root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
        if (!blk_queue_nonrot(bdev_get_queue(bdev)))
@@ -1756,10 +1841,9 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        ret = btrfs_del_item(trans, root, path);
-       BUG_ON(ret);
 
        btrfs_free_path(path);
-       return 0;
+       return ret;
 }
 
 static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
@@ -2020,8 +2104,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
 
        /* step two, relocate all the chunks */
        path = btrfs_alloc_path();
-       BUG_ON(!path);
-
+       if (!path) {
+               ret = -ENOMEM;
+               goto error;
+       }
        key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        key.offset = (u64)-1;
        key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2057,7 +2143,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                                           chunk_root->root_key.objectid,
                                           found_key.objectid,
                                           found_key.offset);
-               BUG_ON(ret && ret != -ENOSPC);
+               if (ret && ret != -ENOSPC)
+                       goto error;
                key.offset = found_key.offset - 1;
        }
        ret = 0;
@@ -2368,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                        total_avail = device->total_bytes - device->bytes_used;
                else
                        total_avail = 0;
-               /* avail is off by max(alloc_start, 1MB), but that is the same
-                * for all devices, so it doesn't hurt the sorting later on
-                */
+
+               /* If there is no space on this device, skip it. */
+               if (total_avail == 0)
+                       continue;
 
                ret = find_free_dev_extent(trans, device,
                                           max_stripe_size * dev_stripes,
@@ -2619,7 +2707,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
        ret = find_next_chunk(fs_info->chunk_root,
                              BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
-       BUG_ON(ret);
+       if (ret)
+               return ret;
 
        alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
                        (fs_info->metadata_alloc_profile &
@@ -3553,7 +3642,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        if (!sb)
                return -ENOMEM;
        btrfs_set_buffer_uptodate(sb);
-       btrfs_set_buffer_lockdep_class(sb, 0);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
 
        write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
        array_size = btrfs_super_sys_array_size(super_copy);