Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6
[linux-2.6.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include "compat.h"
25 #include "hash.h"
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "print-tree.h"
29 #include "transaction.h"
30 #include "volumes.h"
31 #include "locking.h"
32 #include "free-space-cache.h"
33
34 static int update_reserved_extents(struct btrfs_root *root,
35                                    u64 bytenr, u64 num, int reserve);
36 static int update_block_group(struct btrfs_trans_handle *trans,
37                               struct btrfs_root *root,
38                               u64 bytenr, u64 num_bytes, int alloc,
39                               int mark_free);
40 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
41                                 struct btrfs_root *root,
42                                 u64 bytenr, u64 num_bytes, u64 parent,
43                                 u64 root_objectid, u64 owner_objectid,
44                                 u64 owner_offset, int refs_to_drop,
45                                 struct btrfs_delayed_extent_op *extra_op);
46 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
47                                     struct extent_buffer *leaf,
48                                     struct btrfs_extent_item *ei);
49 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
50                                       struct btrfs_root *root,
51                                       u64 parent, u64 root_objectid,
52                                       u64 flags, u64 owner, u64 offset,
53                                       struct btrfs_key *ins, int ref_mod);
54 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
55                                      struct btrfs_root *root,
56                                      u64 parent, u64 root_objectid,
57                                      u64 flags, struct btrfs_disk_key *key,
58                                      int level, struct btrfs_key *ins);
59
60 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61                           struct btrfs_root *extent_root, u64 alloc_bytes,
62                           u64 flags, int force);
63
64 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65 {
66         return (cache->flags & bits) == bits;
67 }
68
69 /*
70  * this adds the block group to the fs_info rb tree for the block group
71  * cache
72  */
73 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
74                                 struct btrfs_block_group_cache *block_group)
75 {
76         struct rb_node **p;
77         struct rb_node *parent = NULL;
78         struct btrfs_block_group_cache *cache;
79
80         spin_lock(&info->block_group_cache_lock);
81         p = &info->block_group_cache_tree.rb_node;
82
83         while (*p) {
84                 parent = *p;
85                 cache = rb_entry(parent, struct btrfs_block_group_cache,
86                                  cache_node);
87                 if (block_group->key.objectid < cache->key.objectid) {
88                         p = &(*p)->rb_left;
89                 } else if (block_group->key.objectid > cache->key.objectid) {
90                         p = &(*p)->rb_right;
91                 } else {
92                         spin_unlock(&info->block_group_cache_lock);
93                         return -EEXIST;
94                 }
95         }
96
97         rb_link_node(&block_group->cache_node, parent, p);
98         rb_insert_color(&block_group->cache_node,
99                         &info->block_group_cache_tree);
100         spin_unlock(&info->block_group_cache_lock);
101
102         return 0;
103 }
104
105 /*
106  * This will return the block group at or after bytenr if contains is 0, else
107  * it will return the block group that contains the bytenr
108  */
109 static struct btrfs_block_group_cache *
110 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
111                               int contains)
112 {
113         struct btrfs_block_group_cache *cache, *ret = NULL;
114         struct rb_node *n;
115         u64 end, start;
116
117         spin_lock(&info->block_group_cache_lock);
118         n = info->block_group_cache_tree.rb_node;
119
120         while (n) {
121                 cache = rb_entry(n, struct btrfs_block_group_cache,
122                                  cache_node);
123                 end = cache->key.objectid + cache->key.offset - 1;
124                 start = cache->key.objectid;
125
126                 if (bytenr < start) {
127                         if (!contains && (!ret || start < ret->key.objectid))
128                                 ret = cache;
129                         n = n->rb_left;
130                 } else if (bytenr > start) {
131                         if (contains && bytenr <= end) {
132                                 ret = cache;
133                                 break;
134                         }
135                         n = n->rb_right;
136                 } else {
137                         ret = cache;
138                         break;
139                 }
140         }
141         if (ret)
142                 atomic_inc(&ret->count);
143         spin_unlock(&info->block_group_cache_lock);
144
145         return ret;
146 }
147
148 /*
149  * this is only called by cache_block_group, since we could have freed extents
150  * we need to check the pinned_extents for any extents that can't be used yet
151  * since their free space will be released as soon as the transaction commits.
152  */
153 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
154                               struct btrfs_fs_info *info, u64 start, u64 end)
155 {
156         u64 extent_start, extent_end, size;
157         int ret;
158
159         while (start < end) {
160                 ret = find_first_extent_bit(&info->pinned_extents, start,
161                                             &extent_start, &extent_end,
162                                             EXTENT_DIRTY);
163                 if (ret)
164                         break;
165
166                 if (extent_start == start) {
167                         start = extent_end + 1;
168                 } else if (extent_start > start && extent_start < end) {
169                         size = extent_start - start;
170                         ret = btrfs_add_free_space(block_group, start,
171                                                    size);
172                         BUG_ON(ret);
173                         start = extent_end + 1;
174                 } else {
175                         break;
176                 }
177         }
178
179         if (start < end) {
180                 size = end - start;
181                 ret = btrfs_add_free_space(block_group, start, size);
182                 BUG_ON(ret);
183         }
184
185         return 0;
186 }
187
188 static int remove_sb_from_cache(struct btrfs_root *root,
189                                 struct btrfs_block_group_cache *cache)
190 {
191         u64 bytenr;
192         u64 *logical;
193         int stripe_len;
194         int i, nr, ret;
195
196         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
197                 bytenr = btrfs_sb_offset(i);
198                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
199                                        cache->key.objectid, bytenr, 0,
200                                        &logical, &nr, &stripe_len);
201                 BUG_ON(ret);
202                 while (nr--) {
203                         btrfs_remove_free_space(cache, logical[nr],
204                                                 stripe_len);
205                 }
206                 kfree(logical);
207         }
208         return 0;
209 }
210
211 static int cache_block_group(struct btrfs_root *root,
212                              struct btrfs_block_group_cache *block_group)
213 {
214         struct btrfs_path *path;
215         int ret = 0;
216         struct btrfs_key key;
217         struct extent_buffer *leaf;
218         int slot;
219         u64 last;
220
221         if (!block_group)
222                 return 0;
223
224         root = root->fs_info->extent_root;
225
226         if (block_group->cached)
227                 return 0;
228
229         path = btrfs_alloc_path();
230         if (!path)
231                 return -ENOMEM;
232
233         path->reada = 2;
234         /*
235          * we get into deadlocks with paths held by callers of this function.
236          * since the alloc_mutex is protecting things right now, just
237          * skip the locking here
238          */
239         path->skip_locking = 1;
240         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
241         key.objectid = last;
242         key.offset = 0;
243         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
244         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
245         if (ret < 0)
246                 goto err;
247
248         while (1) {
249                 leaf = path->nodes[0];
250                 slot = path->slots[0];
251                 if (slot >= btrfs_header_nritems(leaf)) {
252                         ret = btrfs_next_leaf(root, path);
253                         if (ret < 0)
254                                 goto err;
255                         if (ret == 0)
256                                 continue;
257                         else
258                                 break;
259                 }
260                 btrfs_item_key_to_cpu(leaf, &key, slot);
261                 if (key.objectid < block_group->key.objectid)
262                         goto next;
263
264                 if (key.objectid >= block_group->key.objectid +
265                     block_group->key.offset)
266                         break;
267
268                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         add_new_free_space(block_group, root->fs_info, last,
279                            block_group->key.objectid +
280                            block_group->key.offset);
281
282         block_group->cached = 1;
283         remove_sb_from_cache(root, block_group);
284         ret = 0;
285 err:
286         btrfs_free_path(path);
287         return ret;
288 }
289
290 /*
291  * return the block group that starts at or after bytenr
292  */
293 static struct btrfs_block_group_cache *
294 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
295 {
296         struct btrfs_block_group_cache *cache;
297
298         cache = block_group_cache_tree_search(info, bytenr, 0);
299
300         return cache;
301 }
302
303 /*
304  * return the block group that contains the given bytenr
305  */
306 struct btrfs_block_group_cache *btrfs_lookup_block_group(
307                                                  struct btrfs_fs_info *info,
308                                                  u64 bytenr)
309 {
310         struct btrfs_block_group_cache *cache;
311
312         cache = block_group_cache_tree_search(info, bytenr, 1);
313
314         return cache;
315 }
316
317 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
318 {
319         if (atomic_dec_and_test(&cache->count))
320                 kfree(cache);
321 }
322
323 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
324                                                   u64 flags)
325 {
326         struct list_head *head = &info->space_info;
327         struct btrfs_space_info *found;
328
329         rcu_read_lock();
330         list_for_each_entry_rcu(found, head, list) {
331                 if (found->flags == flags) {
332                         rcu_read_unlock();
333                         return found;
334                 }
335         }
336         rcu_read_unlock();
337         return NULL;
338 }
339
340 /*
341  * after adding space to the filesystem, we need to clear the full flags
342  * on all the space infos.
343  */
344 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
345 {
346         struct list_head *head = &info->space_info;
347         struct btrfs_space_info *found;
348
349         rcu_read_lock();
350         list_for_each_entry_rcu(found, head, list)
351                 found->full = 0;
352         rcu_read_unlock();
353 }
354
355 static u64 div_factor(u64 num, int factor)
356 {
357         if (factor == 10)
358                 return num;
359         num *= factor;
360         do_div(num, 10);
361         return num;
362 }
363
364 u64 btrfs_find_block_group(struct btrfs_root *root,
365                            u64 search_start, u64 search_hint, int owner)
366 {
367         struct btrfs_block_group_cache *cache;
368         u64 used;
369         u64 last = max(search_hint, search_start);
370         u64 group_start = 0;
371         int full_search = 0;
372         int factor = 9;
373         int wrapped = 0;
374 again:
375         while (1) {
376                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
377                 if (!cache)
378                         break;
379
380                 spin_lock(&cache->lock);
381                 last = cache->key.objectid + cache->key.offset;
382                 used = btrfs_block_group_used(&cache->item);
383
384                 if ((full_search || !cache->ro) &&
385                     block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
386                         if (used + cache->pinned + cache->reserved <
387                             div_factor(cache->key.offset, factor)) {
388                                 group_start = cache->key.objectid;
389                                 spin_unlock(&cache->lock);
390                                 btrfs_put_block_group(cache);
391                                 goto found;
392                         }
393                 }
394                 spin_unlock(&cache->lock);
395                 btrfs_put_block_group(cache);
396                 cond_resched();
397         }
398         if (!wrapped) {
399                 last = search_start;
400                 wrapped = 1;
401                 goto again;
402         }
403         if (!full_search && factor < 10) {
404                 last = search_start;
405                 full_search = 1;
406                 factor = 10;
407                 goto again;
408         }
409 found:
410         return group_start;
411 }
412
413 /* simple helper to search for an existing extent at a given offset */
414 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
415 {
416         int ret;
417         struct btrfs_key key;
418         struct btrfs_path *path;
419
420         path = btrfs_alloc_path();
421         BUG_ON(!path);
422         key.objectid = start;
423         key.offset = len;
424         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
425         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
426                                 0, 0);
427         btrfs_free_path(path);
428         return ret;
429 }
430
431 /*
432  * Back reference rules.  Back refs have three main goals:
433  *
434  * 1) differentiate between all holders of references to an extent so that
435  *    when a reference is dropped we can make sure it was a valid reference
436  *    before freeing the extent.
437  *
438  * 2) Provide enough information to quickly find the holders of an extent
439  *    if we notice a given block is corrupted or bad.
440  *
441  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
442  *    maintenance.  This is actually the same as #2, but with a slightly
443  *    different use case.
444  *
445  * There are two kinds of back refs. The implicit back refs is optimized
446  * for pointers in non-shared tree blocks. For a given pointer in a block,
447  * back refs of this kind provide information about the block's owner tree
448  * and the pointer's key. These information allow us to find the block by
449  * b-tree searching. The full back refs is for pointers in tree blocks not
450  * referenced by their owner trees. The location of tree block is recorded
451  * in the back refs. Actually the full back refs is generic, and can be
452  * used in all cases the implicit back refs is used. The major shortcoming
453  * of the full back refs is its overhead. Every time a tree block gets
454  * COWed, we have to update back refs entry for all pointers in it.
455  *
456  * For a newly allocated tree block, we use implicit back refs for
457  * pointers in it. This means most tree related operations only involve
458  * implicit back refs. For a tree block created in old transaction, the
459  * only way to drop a reference to it is COW it. So we can detect the
460  * event that tree block loses its owner tree's reference and do the
461  * back refs conversion.
462  *
463  * When a tree block is COW'd through a tree, there are four cases:
464  *
465  * The reference count of the block is one and the tree is the block's
466  * owner tree. Nothing to do in this case.
467  *
468  * The reference count of the block is one and the tree is not the
469  * block's owner tree. In this case, full back refs is used for pointers
470  * in the block. Remove these full back refs, add implicit back refs for
471  * every pointers in the new block.
472  *
473  * The reference count of the block is greater than one and the tree is
474  * the block's owner tree. In this case, implicit back refs is used for
475  * pointers in the block. Add full back refs for every pointers in the
476  * block, increase lower level extents' reference counts. The original
477  * implicit back refs are entailed to the new block.
478  *
479  * The reference count of the block is greater than one and the tree is
480  * not the block's owner tree. Add implicit back refs for every pointer in
481  * the new block, increase lower level extents' reference count.
482  *
483  * Back Reference Key composing:
484  *
485  * The key objectid corresponds to the first byte in the extent,
486  * The key type is used to differentiate between types of back refs.
487  * There are different meanings of the key offset for different types
488  * of back refs.
489  *
490  * File extents can be referenced by:
491  *
492  * - multiple snapshots, subvolumes, or different generations in one subvol
493  * - different files inside a single subvolume
494  * - different offsets inside a file (bookend extents in file.c)
495  *
496  * The extent ref structure for the implicit back refs has fields for:
497  *
498  * - Objectid of the subvolume root
499  * - objectid of the file holding the reference
500  * - original offset in the file
501  * - how many bookend extents
502  *
503  * The key offset for the implicit back refs is hash of the first
504  * three fields.
505  *
506  * The extent ref structure for the full back refs has field for:
507  *
508  * - number of pointers in the tree leaf
509  *
510  * The key offset for the implicit back refs is the first byte of
511  * the tree leaf
512  *
513  * When a file extent is allocated, The implicit back refs is used.
514  * the fields are filled in:
515  *
516  *     (root_key.objectid, inode objectid, offset in file, 1)
517  *
518  * When a file extent is removed file truncation, we find the
519  * corresponding implicit back refs and check the following fields:
520  *
521  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
522  *
523  * Btree extents can be referenced by:
524  *
525  * - Different subvolumes
526  *
527  * Both the implicit back refs and the full back refs for tree blocks
528  * only consist of key. The key offset for the implicit back refs is
529  * objectid of block's owner tree. The key offset for the full back refs
530  * is the first byte of parent block.
531  *
532  * When implicit back refs is used, information about the lowest key and
533  * level of the tree block are required. These information are stored in
534  * tree block info structure.
535  */
536
537 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
538 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
539                                   struct btrfs_root *root,
540                                   struct btrfs_path *path,
541                                   u64 owner, u32 extra_size)
542 {
543         struct btrfs_extent_item *item;
544         struct btrfs_extent_item_v0 *ei0;
545         struct btrfs_extent_ref_v0 *ref0;
546         struct btrfs_tree_block_info *bi;
547         struct extent_buffer *leaf;
548         struct btrfs_key key;
549         struct btrfs_key found_key;
550         u32 new_size = sizeof(*item);
551         u64 refs;
552         int ret;
553
554         leaf = path->nodes[0];
555         BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
556
557         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
558         ei0 = btrfs_item_ptr(leaf, path->slots[0],
559                              struct btrfs_extent_item_v0);
560         refs = btrfs_extent_refs_v0(leaf, ei0);
561
562         if (owner == (u64)-1) {
563                 while (1) {
564                         if (path->slots[0] >= btrfs_header_nritems(leaf)) {
565                                 ret = btrfs_next_leaf(root, path);
566                                 if (ret < 0)
567                                         return ret;
568                                 BUG_ON(ret > 0);
569                                 leaf = path->nodes[0];
570                         }
571                         btrfs_item_key_to_cpu(leaf, &found_key,
572                                               path->slots[0]);
573                         BUG_ON(key.objectid != found_key.objectid);
574                         if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
575                                 path->slots[0]++;
576                                 continue;
577                         }
578                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
579                                               struct btrfs_extent_ref_v0);
580                         owner = btrfs_ref_objectid_v0(leaf, ref0);
581                         break;
582                 }
583         }
584         btrfs_release_path(root, path);
585
586         if (owner < BTRFS_FIRST_FREE_OBJECTID)
587                 new_size += sizeof(*bi);
588
589         new_size -= sizeof(*ei0);
590         ret = btrfs_search_slot(trans, root, &key, path,
591                                 new_size + extra_size, 1);
592         if (ret < 0)
593                 return ret;
594         BUG_ON(ret);
595
596         ret = btrfs_extend_item(trans, root, path, new_size);
597         BUG_ON(ret);
598
599         leaf = path->nodes[0];
600         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
601         btrfs_set_extent_refs(leaf, item, refs);
602         /* FIXME: get real generation */
603         btrfs_set_extent_generation(leaf, item, 0);
604         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
605                 btrfs_set_extent_flags(leaf, item,
606                                        BTRFS_EXTENT_FLAG_TREE_BLOCK |
607                                        BTRFS_BLOCK_FLAG_FULL_BACKREF);
608                 bi = (struct btrfs_tree_block_info *)(item + 1);
609                 /* FIXME: get first key of the block */
610                 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
611                 btrfs_set_tree_block_level(leaf, bi, (int)owner);
612         } else {
613                 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
614         }
615         btrfs_mark_buffer_dirty(leaf);
616         return 0;
617 }
618 #endif
619
620 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
621 {
622         u32 high_crc = ~(u32)0;
623         u32 low_crc = ~(u32)0;
624         __le64 lenum;
625
626         lenum = cpu_to_le64(root_objectid);
627         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
628         lenum = cpu_to_le64(owner);
629         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
630         lenum = cpu_to_le64(offset);
631         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
632
633         return ((u64)high_crc << 31) ^ (u64)low_crc;
634 }
635
636 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
637                                      struct btrfs_extent_data_ref *ref)
638 {
639         return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
640                                     btrfs_extent_data_ref_objectid(leaf, ref),
641                                     btrfs_extent_data_ref_offset(leaf, ref));
642 }
643
644 static int match_extent_data_ref(struct extent_buffer *leaf,
645                                  struct btrfs_extent_data_ref *ref,
646                                  u64 root_objectid, u64 owner, u64 offset)
647 {
648         if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
649             btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
650             btrfs_extent_data_ref_offset(leaf, ref) != offset)
651                 return 0;
652         return 1;
653 }
654
655 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
656                                            struct btrfs_root *root,
657                                            struct btrfs_path *path,
658                                            u64 bytenr, u64 parent,
659                                            u64 root_objectid,
660                                            u64 owner, u64 offset)
661 {
662         struct btrfs_key key;
663         struct btrfs_extent_data_ref *ref;
664         struct extent_buffer *leaf;
665         u32 nritems;
666         int ret;
667         int recow;
668         int err = -ENOENT;
669
670         key.objectid = bytenr;
671         if (parent) {
672                 key.type = BTRFS_SHARED_DATA_REF_KEY;
673                 key.offset = parent;
674         } else {
675                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
676                 key.offset = hash_extent_data_ref(root_objectid,
677                                                   owner, offset);
678         }
679 again:
680         recow = 0;
681         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
682         if (ret < 0) {
683                 err = ret;
684                 goto fail;
685         }
686
687         if (parent) {
688                 if (!ret)
689                         return 0;
690 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
691                 key.type = BTRFS_EXTENT_REF_V0_KEY;
692                 btrfs_release_path(root, path);
693                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
694                 if (ret < 0) {
695                         err = ret;
696                         goto fail;
697                 }
698                 if (!ret)
699                         return 0;
700 #endif
701                 goto fail;
702         }
703
704         leaf = path->nodes[0];
705         nritems = btrfs_header_nritems(leaf);
706         while (1) {
707                 if (path->slots[0] >= nritems) {
708                         ret = btrfs_next_leaf(root, path);
709                         if (ret < 0)
710                                 err = ret;
711                         if (ret)
712                                 goto fail;
713
714                         leaf = path->nodes[0];
715                         nritems = btrfs_header_nritems(leaf);
716                         recow = 1;
717                 }
718
719                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
720                 if (key.objectid != bytenr ||
721                     key.type != BTRFS_EXTENT_DATA_REF_KEY)
722                         goto fail;
723
724                 ref = btrfs_item_ptr(leaf, path->slots[0],
725                                      struct btrfs_extent_data_ref);
726
727                 if (match_extent_data_ref(leaf, ref, root_objectid,
728                                           owner, offset)) {
729                         if (recow) {
730                                 btrfs_release_path(root, path);
731                                 goto again;
732                         }
733                         err = 0;
734                         break;
735                 }
736                 path->slots[0]++;
737         }
738 fail:
739         return err;
740 }
741
742 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
743                                            struct btrfs_root *root,
744                                            struct btrfs_path *path,
745                                            u64 bytenr, u64 parent,
746                                            u64 root_objectid, u64 owner,
747                                            u64 offset, int refs_to_add)
748 {
749         struct btrfs_key key;
750         struct extent_buffer *leaf;
751         u32 size;
752         u32 num_refs;
753         int ret;
754
755         key.objectid = bytenr;
756         if (parent) {
757                 key.type = BTRFS_SHARED_DATA_REF_KEY;
758                 key.offset = parent;
759                 size = sizeof(struct btrfs_shared_data_ref);
760         } else {
761                 key.type = BTRFS_EXTENT_DATA_REF_KEY;
762                 key.offset = hash_extent_data_ref(root_objectid,
763                                                   owner, offset);
764                 size = sizeof(struct btrfs_extent_data_ref);
765         }
766
767         ret = btrfs_insert_empty_item(trans, root, path, &key, size);
768         if (ret && ret != -EEXIST)
769                 goto fail;
770
771         leaf = path->nodes[0];
772         if (parent) {
773                 struct btrfs_shared_data_ref *ref;
774                 ref = btrfs_item_ptr(leaf, path->slots[0],
775                                      struct btrfs_shared_data_ref);
776                 if (ret == 0) {
777                         btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
778                 } else {
779                         num_refs = btrfs_shared_data_ref_count(leaf, ref);
780                         num_refs += refs_to_add;
781                         btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
782                 }
783         } else {
784                 struct btrfs_extent_data_ref *ref;
785                 while (ret == -EEXIST) {
786                         ref = btrfs_item_ptr(leaf, path->slots[0],
787                                              struct btrfs_extent_data_ref);
788                         if (match_extent_data_ref(leaf, ref, root_objectid,
789                                                   owner, offset))
790                                 break;
791                         btrfs_release_path(root, path);
792                         key.offset++;
793                         ret = btrfs_insert_empty_item(trans, root, path, &key,
794                                                       size);
795                         if (ret && ret != -EEXIST)
796                                 goto fail;
797
798                         leaf = path->nodes[0];
799                 }
800                 ref = btrfs_item_ptr(leaf, path->slots[0],
801                                      struct btrfs_extent_data_ref);
802                 if (ret == 0) {
803                         btrfs_set_extent_data_ref_root(leaf, ref,
804                                                        root_objectid);
805                         btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
806                         btrfs_set_extent_data_ref_offset(leaf, ref, offset);
807                         btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
808                 } else {
809                         num_refs = btrfs_extent_data_ref_count(leaf, ref);
810                         num_refs += refs_to_add;
811                         btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
812                 }
813         }
814         btrfs_mark_buffer_dirty(leaf);
815         ret = 0;
816 fail:
817         btrfs_release_path(root, path);
818         return ret;
819 }
820
821 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
822                                            struct btrfs_root *root,
823                                            struct btrfs_path *path,
824                                            int refs_to_drop)
825 {
826         struct btrfs_key key;
827         struct btrfs_extent_data_ref *ref1 = NULL;
828         struct btrfs_shared_data_ref *ref2 = NULL;
829         struct extent_buffer *leaf;
830         u32 num_refs = 0;
831         int ret = 0;
832
833         leaf = path->nodes[0];
834         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
835
836         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
837                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
838                                       struct btrfs_extent_data_ref);
839                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
840         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
841                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
842                                       struct btrfs_shared_data_ref);
843                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
845         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
846                 struct btrfs_extent_ref_v0 *ref0;
847                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
848                                       struct btrfs_extent_ref_v0);
849                 num_refs = btrfs_ref_count_v0(leaf, ref0);
850 #endif
851         } else {
852                 BUG();
853         }
854
855         BUG_ON(num_refs < refs_to_drop);
856         num_refs -= refs_to_drop;
857
858         if (num_refs == 0) {
859                 ret = btrfs_del_item(trans, root, path);
860         } else {
861                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
862                         btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
863                 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
864                         btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
866                 else {
867                         struct btrfs_extent_ref_v0 *ref0;
868                         ref0 = btrfs_item_ptr(leaf, path->slots[0],
869                                         struct btrfs_extent_ref_v0);
870                         btrfs_set_ref_count_v0(leaf, ref0, num_refs);
871                 }
872 #endif
873                 btrfs_mark_buffer_dirty(leaf);
874         }
875         return ret;
876 }
877
878 static noinline u32 extent_data_ref_count(struct btrfs_root *root,
879                                           struct btrfs_path *path,
880                                           struct btrfs_extent_inline_ref *iref)
881 {
882         struct btrfs_key key;
883         struct extent_buffer *leaf;
884         struct btrfs_extent_data_ref *ref1;
885         struct btrfs_shared_data_ref *ref2;
886         u32 num_refs = 0;
887
888         leaf = path->nodes[0];
889         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
890         if (iref) {
891                 if (btrfs_extent_inline_ref_type(leaf, iref) ==
892                     BTRFS_EXTENT_DATA_REF_KEY) {
893                         ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
894                         num_refs = btrfs_extent_data_ref_count(leaf, ref1);
895                 } else {
896                         ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
897                         num_refs = btrfs_shared_data_ref_count(leaf, ref2);
898                 }
899         } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
900                 ref1 = btrfs_item_ptr(leaf, path->slots[0],
901                                       struct btrfs_extent_data_ref);
902                 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
903         } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
904                 ref2 = btrfs_item_ptr(leaf, path->slots[0],
905                                       struct btrfs_shared_data_ref);
906                 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
908         } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
909                 struct btrfs_extent_ref_v0 *ref0;
910                 ref0 = btrfs_item_ptr(leaf, path->slots[0],
911                                       struct btrfs_extent_ref_v0);
912                 num_refs = btrfs_ref_count_v0(leaf, ref0);
913 #endif
914         } else {
915                 WARN_ON(1);
916         }
917         return num_refs;
918 }
919
920 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
921                                           struct btrfs_root *root,
922                                           struct btrfs_path *path,
923                                           u64 bytenr, u64 parent,
924                                           u64 root_objectid)
925 {
926         struct btrfs_key key;
927         int ret;
928
929         key.objectid = bytenr;
930         if (parent) {
931                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
932                 key.offset = parent;
933         } else {
934                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
935                 key.offset = root_objectid;
936         }
937
938         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
939         if (ret > 0)
940                 ret = -ENOENT;
941 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
942         if (ret == -ENOENT && parent) {
943                 btrfs_release_path(root, path);
944                 key.type = BTRFS_EXTENT_REF_V0_KEY;
945                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
946                 if (ret > 0)
947                         ret = -ENOENT;
948         }
949 #endif
950         return ret;
951 }
952
953 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
954                                           struct btrfs_root *root,
955                                           struct btrfs_path *path,
956                                           u64 bytenr, u64 parent,
957                                           u64 root_objectid)
958 {
959         struct btrfs_key key;
960         int ret;
961
962         key.objectid = bytenr;
963         if (parent) {
964                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
965                 key.offset = parent;
966         } else {
967                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
968                 key.offset = root_objectid;
969         }
970
971         ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
972         btrfs_release_path(root, path);
973         return ret;
974 }
975
976 static inline int extent_ref_type(u64 parent, u64 owner)
977 {
978         int type;
979         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
980                 if (parent > 0)
981                         type = BTRFS_SHARED_BLOCK_REF_KEY;
982                 else
983                         type = BTRFS_TREE_BLOCK_REF_KEY;
984         } else {
985                 if (parent > 0)
986                         type = BTRFS_SHARED_DATA_REF_KEY;
987                 else
988                         type = BTRFS_EXTENT_DATA_REF_KEY;
989         }
990         return type;
991 }
992
993 static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
994
995 {
996         int level;
997         BUG_ON(!path->keep_locks);
998         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999                 if (!path->nodes[level])
1000                         break;
1001                 btrfs_assert_tree_locked(path->nodes[level]);
1002                 if (path->slots[level] + 1 >=
1003                     btrfs_header_nritems(path->nodes[level]))
1004                         continue;
1005                 if (level == 0)
1006                         btrfs_item_key_to_cpu(path->nodes[level], key,
1007                                               path->slots[level] + 1);
1008                 else
1009                         btrfs_node_key_to_cpu(path->nodes[level], key,
1010                                               path->slots[level] + 1);
1011                 return 0;
1012         }
1013         return 1;
1014 }
1015
1016 /*
1017  * look for inline back ref. if back ref is found, *ref_ret is set
1018  * to the address of inline back ref, and 0 is returned.
1019  *
1020  * if back ref isn't found, *ref_ret is set to the address where it
1021  * should be inserted, and -ENOENT is returned.
1022  *
1023  * if insert is true and there are too many inline back refs, the path
1024  * points to the extent item, and -EAGAIN is returned.
1025  *
1026  * NOTE: inline back refs are ordered in the same way that back ref
1027  *       items in the tree are ordered.
1028  */
1029 static noinline_for_stack
1030 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1031                                  struct btrfs_root *root,
1032                                  struct btrfs_path *path,
1033                                  struct btrfs_extent_inline_ref **ref_ret,
1034                                  u64 bytenr, u64 num_bytes,
1035                                  u64 parent, u64 root_objectid,
1036                                  u64 owner, u64 offset, int insert)
1037 {
1038         struct btrfs_key key;
1039         struct extent_buffer *leaf;
1040         struct btrfs_extent_item *ei;
1041         struct btrfs_extent_inline_ref *iref;
1042         u64 flags;
1043         u64 item_size;
1044         unsigned long ptr;
1045         unsigned long end;
1046         int extra_size;
1047         int type;
1048         int want;
1049         int ret;
1050         int err = 0;
1051
1052         key.objectid = bytenr;
1053         key.type = BTRFS_EXTENT_ITEM_KEY;
1054         key.offset = num_bytes;
1055
1056         want = extent_ref_type(parent, owner);
1057         if (insert) {
1058                 extra_size = btrfs_extent_inline_ref_size(want);
1059                 path->keep_locks = 1;
1060         } else
1061                 extra_size = -1;
1062         ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1063         if (ret < 0) {
1064                 err = ret;
1065                 goto out;
1066         }
1067         BUG_ON(ret);
1068
1069         leaf = path->nodes[0];
1070         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1071 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1072         if (item_size < sizeof(*ei)) {
1073                 if (!insert) {
1074                         err = -ENOENT;
1075                         goto out;
1076                 }
1077                 ret = convert_extent_item_v0(trans, root, path, owner,
1078                                              extra_size);
1079                 if (ret < 0) {
1080                         err = ret;
1081                         goto out;
1082                 }
1083                 leaf = path->nodes[0];
1084                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1085         }
1086 #endif
1087         BUG_ON(item_size < sizeof(*ei));
1088
1089         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1090         flags = btrfs_extent_flags(leaf, ei);
1091
1092         ptr = (unsigned long)(ei + 1);
1093         end = (unsigned long)ei + item_size;
1094
1095         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1096                 ptr += sizeof(struct btrfs_tree_block_info);
1097                 BUG_ON(ptr > end);
1098         } else {
1099                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1100         }
1101
1102         err = -ENOENT;
1103         while (1) {
1104                 if (ptr >= end) {
1105                         WARN_ON(ptr > end);
1106                         break;
1107                 }
1108                 iref = (struct btrfs_extent_inline_ref *)ptr;
1109                 type = btrfs_extent_inline_ref_type(leaf, iref);
1110                 if (want < type)
1111                         break;
1112                 if (want > type) {
1113                         ptr += btrfs_extent_inline_ref_size(type);
1114                         continue;
1115                 }
1116
1117                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1118                         struct btrfs_extent_data_ref *dref;
1119                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1120                         if (match_extent_data_ref(leaf, dref, root_objectid,
1121                                                   owner, offset)) {
1122                                 err = 0;
1123                                 break;
1124                         }
1125                         if (hash_extent_data_ref_item(leaf, dref) <
1126                             hash_extent_data_ref(root_objectid, owner, offset))
1127                                 break;
1128                 } else {
1129                         u64 ref_offset;
1130                         ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1131                         if (parent > 0) {
1132                                 if (parent == ref_offset) {
1133                                         err = 0;
1134                                         break;
1135                                 }
1136                                 if (ref_offset < parent)
1137                                         break;
1138                         } else {
1139                                 if (root_objectid == ref_offset) {
1140                                         err = 0;
1141                                         break;
1142                                 }
1143                                 if (ref_offset < root_objectid)
1144                                         break;
1145                         }
1146                 }
1147                 ptr += btrfs_extent_inline_ref_size(type);
1148         }
1149         if (err == -ENOENT && insert) {
1150                 if (item_size + extra_size >=
1151                     BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1152                         err = -EAGAIN;
1153                         goto out;
1154                 }
1155                 /*
1156                  * To add new inline back ref, we have to make sure
1157                  * there is no corresponding back ref item.
1158                  * For simplicity, we just do not add new inline back
1159                  * ref if there is any kind of item for this block
1160                  */
1161                 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1162                     key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163                         err = -EAGAIN;
1164                         goto out;
1165                 }
1166         }
1167         *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1168 out:
1169         if (insert) {
1170                 path->keep_locks = 0;
1171                 btrfs_unlock_up_safe(path, 1);
1172         }
1173         return err;
1174 }
1175
1176 /*
1177  * helper to add new inline back ref
1178  */
1179 static noinline_for_stack
1180 int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1181                                 struct btrfs_root *root,
1182                                 struct btrfs_path *path,
1183                                 struct btrfs_extent_inline_ref *iref,
1184                                 u64 parent, u64 root_objectid,
1185                                 u64 owner, u64 offset, int refs_to_add,
1186                                 struct btrfs_delayed_extent_op *extent_op)
1187 {
1188         struct extent_buffer *leaf;
1189         struct btrfs_extent_item *ei;
1190         unsigned long ptr;
1191         unsigned long end;
1192         unsigned long item_offset;
1193         u64 refs;
1194         int size;
1195         int type;
1196         int ret;
1197
1198         leaf = path->nodes[0];
1199         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1200         item_offset = (unsigned long)iref - (unsigned long)ei;
1201
1202         type = extent_ref_type(parent, owner);
1203         size = btrfs_extent_inline_ref_size(type);
1204
1205         ret = btrfs_extend_item(trans, root, path, size);
1206         BUG_ON(ret);
1207
1208         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1209         refs = btrfs_extent_refs(leaf, ei);
1210         refs += refs_to_add;
1211         btrfs_set_extent_refs(leaf, ei, refs);
1212         if (extent_op)
1213                 __run_delayed_extent_op(extent_op, leaf, ei);
1214
1215         ptr = (unsigned long)ei + item_offset;
1216         end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1217         if (ptr < end - size)
1218                 memmove_extent_buffer(leaf, ptr + size, ptr,
1219                                       end - size - ptr);
1220
1221         iref = (struct btrfs_extent_inline_ref *)ptr;
1222         btrfs_set_extent_inline_ref_type(leaf, iref, type);
1223         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1224                 struct btrfs_extent_data_ref *dref;
1225                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1226                 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1227                 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1228                 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1229                 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1230         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1231                 struct btrfs_shared_data_ref *sref;
1232                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1233                 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1234                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1235         } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1236                 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1237         } else {
1238                 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1239         }
1240         btrfs_mark_buffer_dirty(leaf);
1241         return 0;
1242 }
1243
1244 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1245                                  struct btrfs_root *root,
1246                                  struct btrfs_path *path,
1247                                  struct btrfs_extent_inline_ref **ref_ret,
1248                                  u64 bytenr, u64 num_bytes, u64 parent,
1249                                  u64 root_objectid, u64 owner, u64 offset)
1250 {
1251         int ret;
1252
1253         ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1254                                            bytenr, num_bytes, parent,
1255                                            root_objectid, owner, offset, 0);
1256         if (ret != -ENOENT)
1257                 return ret;
1258
1259         btrfs_release_path(root, path);
1260         *ref_ret = NULL;
1261
1262         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1263                 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1264                                             root_objectid);
1265         } else {
1266                 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1267                                              root_objectid, owner, offset);
1268         }
1269         return ret;
1270 }
1271
1272 /*
1273  * helper to update/remove inline back ref
1274  */
1275 static noinline_for_stack
1276 int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1277                                  struct btrfs_root *root,
1278                                  struct btrfs_path *path,
1279                                  struct btrfs_extent_inline_ref *iref,
1280                                  int refs_to_mod,
1281                                  struct btrfs_delayed_extent_op *extent_op)
1282 {
1283         struct extent_buffer *leaf;
1284         struct btrfs_extent_item *ei;
1285         struct btrfs_extent_data_ref *dref = NULL;
1286         struct btrfs_shared_data_ref *sref = NULL;
1287         unsigned long ptr;
1288         unsigned long end;
1289         u32 item_size;
1290         int size;
1291         int type;
1292         int ret;
1293         u64 refs;
1294
1295         leaf = path->nodes[0];
1296         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1297         refs = btrfs_extent_refs(leaf, ei);
1298         WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1299         refs += refs_to_mod;
1300         btrfs_set_extent_refs(leaf, ei, refs);
1301         if (extent_op)
1302                 __run_delayed_extent_op(extent_op, leaf, ei);
1303
1304         type = btrfs_extent_inline_ref_type(leaf, iref);
1305
1306         if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1307                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1308                 refs = btrfs_extent_data_ref_count(leaf, dref);
1309         } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1310                 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1311                 refs = btrfs_shared_data_ref_count(leaf, sref);
1312         } else {
1313                 refs = 1;
1314                 BUG_ON(refs_to_mod != -1);
1315         }
1316
1317         BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1318         refs += refs_to_mod;
1319
1320         if (refs > 0) {
1321                 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1322                         btrfs_set_extent_data_ref_count(leaf, dref, refs);
1323                 else
1324                         btrfs_set_shared_data_ref_count(leaf, sref, refs);
1325         } else {
1326                 size =  btrfs_extent_inline_ref_size(type);
1327                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1328                 ptr = (unsigned long)iref;
1329                 end = (unsigned long)ei + item_size;
1330                 if (ptr + size < end)
1331                         memmove_extent_buffer(leaf, ptr, ptr + size,
1332                                               end - ptr - size);
1333                 item_size -= size;
1334                 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1335                 BUG_ON(ret);
1336         }
1337         btrfs_mark_buffer_dirty(leaf);
1338         return 0;
1339 }
1340
1341 static noinline_for_stack
1342 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1343                                  struct btrfs_root *root,
1344                                  struct btrfs_path *path,
1345                                  u64 bytenr, u64 num_bytes, u64 parent,
1346                                  u64 root_objectid, u64 owner,
1347                                  u64 offset, int refs_to_add,
1348                                  struct btrfs_delayed_extent_op *extent_op)
1349 {
1350         struct btrfs_extent_inline_ref *iref;
1351         int ret;
1352
1353         ret = lookup_inline_extent_backref(trans, root, path, &iref,
1354                                            bytenr, num_bytes, parent,
1355                                            root_objectid, owner, offset, 1);
1356         if (ret == 0) {
1357                 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1358                 ret = update_inline_extent_backref(trans, root, path, iref,
1359                                                    refs_to_add, extent_op);
1360         } else if (ret == -ENOENT) {
1361                 ret = setup_inline_extent_backref(trans, root, path, iref,
1362                                                   parent, root_objectid,
1363                                                   owner, offset, refs_to_add,
1364                                                   extent_op);
1365         }
1366         return ret;
1367 }
1368
1369 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1370                                  struct btrfs_root *root,
1371                                  struct btrfs_path *path,
1372                                  u64 bytenr, u64 parent, u64 root_objectid,
1373                                  u64 owner, u64 offset, int refs_to_add)
1374 {
1375         int ret;
1376         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1377                 BUG_ON(refs_to_add != 1);
1378                 ret = insert_tree_block_ref(trans, root, path, bytenr,
1379                                             parent, root_objectid);
1380         } else {
1381                 ret = insert_extent_data_ref(trans, root, path, bytenr,
1382                                              parent, root_objectid,
1383                                              owner, offset, refs_to_add);
1384         }
1385         return ret;
1386 }
1387
1388 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1389                                  struct btrfs_root *root,
1390                                  struct btrfs_path *path,
1391                                  struct btrfs_extent_inline_ref *iref,
1392                                  int refs_to_drop, int is_data)
1393 {
1394         int ret;
1395
1396         BUG_ON(!is_data && refs_to_drop != 1);
1397         if (iref) {
1398                 ret = update_inline_extent_backref(trans, root, path, iref,
1399                                                    -refs_to_drop, NULL);
1400         } else if (is_data) {
1401                 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1402         } else {
1403                 ret = btrfs_del_item(trans, root, path);
1404         }
1405         return ret;
1406 }
1407
1408 #ifdef BIO_RW_DISCARD
1409 static void btrfs_issue_discard(struct block_device *bdev,
1410                                 u64 start, u64 len)
1411 {
1412         blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
1413 }
1414 #endif
1415
1416 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1417                                 u64 num_bytes)
1418 {
1419 #ifdef BIO_RW_DISCARD
1420         int ret;
1421         u64 map_length = num_bytes;
1422         struct btrfs_multi_bio *multi = NULL;
1423
1424         /* Tell the block device(s) that the sectors can be discarded */
1425         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1426                               bytenr, &map_length, &multi, 0);
1427         if (!ret) {
1428                 struct btrfs_bio_stripe *stripe = multi->stripes;
1429                 int i;
1430
1431                 if (map_length > num_bytes)
1432                         map_length = num_bytes;
1433
1434                 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1435                         btrfs_issue_discard(stripe->dev->bdev,
1436                                             stripe->physical,
1437                                             map_length);
1438                 }
1439                 kfree(multi);
1440         }
1441
1442         return ret;
1443 #else
1444         return 0;
1445 #endif
1446 }
1447
1448 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1449                          struct btrfs_root *root,
1450                          u64 bytenr, u64 num_bytes, u64 parent,
1451                          u64 root_objectid, u64 owner, u64 offset)
1452 {
1453         int ret;
1454         BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1455                root_objectid == BTRFS_TREE_LOG_OBJECTID);
1456
1457         if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1458                 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
1459                                         parent, root_objectid, (int)owner,
1460                                         BTRFS_ADD_DELAYED_REF, NULL);
1461         } else {
1462                 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1463                                         parent, root_objectid, owner, offset,
1464                                         BTRFS_ADD_DELAYED_REF, NULL);
1465         }
1466         return ret;
1467 }
1468
1469 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1470                                   struct btrfs_root *root,
1471                                   u64 bytenr, u64 num_bytes,
1472                                   u64 parent, u64 root_objectid,
1473                                   u64 owner, u64 offset, int refs_to_add,
1474                                   struct btrfs_delayed_extent_op *extent_op)
1475 {
1476         struct btrfs_path *path;
1477         struct extent_buffer *leaf;
1478         struct btrfs_extent_item *item;
1479         u64 refs;
1480         int ret;
1481         int err = 0;
1482
1483         path = btrfs_alloc_path();
1484         if (!path)
1485                 return -ENOMEM;
1486
1487         path->reada = 1;
1488         path->leave_spinning = 1;
1489         /* this will setup the path even if it fails to insert the back ref */
1490         ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1491                                            path, bytenr, num_bytes, parent,
1492                                            root_objectid, owner, offset,
1493                                            refs_to_add, extent_op);
1494         if (ret == 0)
1495                 goto out;
1496
1497         if (ret != -EAGAIN) {
1498                 err = ret;
1499                 goto out;
1500         }
1501
1502         leaf = path->nodes[0];
1503         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1504         refs = btrfs_extent_refs(leaf, item);
1505         btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1506         if (extent_op)
1507                 __run_delayed_extent_op(extent_op, leaf, item);
1508
1509         btrfs_mark_buffer_dirty(leaf);
1510         btrfs_release_path(root->fs_info->extent_root, path);
1511
1512         path->reada = 1;
1513         path->leave_spinning = 1;
1514
1515         /* now insert the actual backref */
1516         ret = insert_extent_backref(trans, root->fs_info->extent_root,
1517                                     path, bytenr, parent, root_objectid,
1518                                     owner, offset, refs_to_add);
1519         BUG_ON(ret);
1520 out:
1521         btrfs_free_path(path);
1522         return err;
1523 }
1524
1525 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1526                                 struct btrfs_root *root,
1527                                 struct btrfs_delayed_ref_node *node,
1528                                 struct btrfs_delayed_extent_op *extent_op,
1529                                 int insert_reserved)
1530 {
1531         int ret = 0;
1532         struct btrfs_delayed_data_ref *ref;
1533         struct btrfs_key ins;
1534         u64 parent = 0;
1535         u64 ref_root = 0;
1536         u64 flags = 0;
1537
1538         ins.objectid = node->bytenr;
1539         ins.offset = node->num_bytes;
1540         ins.type = BTRFS_EXTENT_ITEM_KEY;
1541
1542         ref = btrfs_delayed_node_to_data_ref(node);
1543         if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1544                 parent = ref->parent;
1545         else
1546                 ref_root = ref->root;
1547
1548         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1549                 if (extent_op) {
1550                         BUG_ON(extent_op->update_key);
1551                         flags |= extent_op->flags_to_set;
1552                 }
1553                 ret = alloc_reserved_file_extent(trans, root,
1554                                                  parent, ref_root, flags,
1555                                                  ref->objectid, ref->offset,
1556                                                  &ins, node->ref_mod);
1557                 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1558         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1559                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1560                                              node->num_bytes, parent,
1561                                              ref_root, ref->objectid,
1562                                              ref->offset, node->ref_mod,
1563                                              extent_op);
1564         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1565                 ret = __btrfs_free_extent(trans, root, node->bytenr,
1566                                           node->num_bytes, parent,
1567                                           ref_root, ref->objectid,
1568                                           ref->offset, node->ref_mod,
1569                                           extent_op);
1570         } else {
1571                 BUG();
1572         }
1573         return ret;
1574 }
1575
1576 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1577                                     struct extent_buffer *leaf,
1578                                     struct btrfs_extent_item *ei)
1579 {
1580         u64 flags = btrfs_extent_flags(leaf, ei);
1581         if (extent_op->update_flags) {
1582                 flags |= extent_op->flags_to_set;
1583                 btrfs_set_extent_flags(leaf, ei, flags);
1584         }
1585
1586         if (extent_op->update_key) {
1587                 struct btrfs_tree_block_info *bi;
1588                 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1589                 bi = (struct btrfs_tree_block_info *)(ei + 1);
1590                 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1591         }
1592 }
1593
1594 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1595                                  struct btrfs_root *root,
1596                                  struct btrfs_delayed_ref_node *node,
1597                                  struct btrfs_delayed_extent_op *extent_op)
1598 {
1599         struct btrfs_key key;
1600         struct btrfs_path *path;
1601         struct btrfs_extent_item *ei;
1602         struct extent_buffer *leaf;
1603         u32 item_size;
1604         int ret;
1605         int err = 0;
1606
1607         path = btrfs_alloc_path();
1608         if (!path)
1609                 return -ENOMEM;
1610
1611         key.objectid = node->bytenr;
1612         key.type = BTRFS_EXTENT_ITEM_KEY;
1613         key.offset = node->num_bytes;
1614
1615         path->reada = 1;
1616         path->leave_spinning = 1;
1617         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1618                                 path, 0, 1);
1619         if (ret < 0) {
1620                 err = ret;
1621                 goto out;
1622         }
1623         if (ret > 0) {
1624                 err = -EIO;
1625                 goto out;
1626         }
1627
1628         leaf = path->nodes[0];
1629         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1630 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1631         if (item_size < sizeof(*ei)) {
1632                 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1633                                              path, (u64)-1, 0);
1634                 if (ret < 0) {
1635                         err = ret;
1636                         goto out;
1637                 }
1638                 leaf = path->nodes[0];
1639                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1640         }
1641 #endif
1642         BUG_ON(item_size < sizeof(*ei));
1643         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1644         __run_delayed_extent_op(extent_op, leaf, ei);
1645
1646         btrfs_mark_buffer_dirty(leaf);
1647 out:
1648         btrfs_free_path(path);
1649         return err;
1650 }
1651
1652 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1653                                 struct btrfs_root *root,
1654                                 struct btrfs_delayed_ref_node *node,
1655                                 struct btrfs_delayed_extent_op *extent_op,
1656                                 int insert_reserved)
1657 {
1658         int ret = 0;
1659         struct btrfs_delayed_tree_ref *ref;
1660         struct btrfs_key ins;
1661         u64 parent = 0;
1662         u64 ref_root = 0;
1663
1664         ins.objectid = node->bytenr;
1665         ins.offset = node->num_bytes;
1666         ins.type = BTRFS_EXTENT_ITEM_KEY;
1667
1668         ref = btrfs_delayed_node_to_tree_ref(node);
1669         if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1670                 parent = ref->parent;
1671         else
1672                 ref_root = ref->root;
1673
1674         BUG_ON(node->ref_mod != 1);
1675         if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1676                 BUG_ON(!extent_op || !extent_op->update_flags ||
1677                        !extent_op->update_key);
1678                 ret = alloc_reserved_tree_block(trans, root,
1679                                                 parent, ref_root,
1680                                                 extent_op->flags_to_set,
1681                                                 &extent_op->key,
1682                                                 ref->level, &ins);
1683                 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1684         } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1685                 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1686                                              node->num_bytes, parent, ref_root,
1687                                              ref->level, 0, 1, extent_op);
1688         } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1689                 ret = __btrfs_free_extent(trans, root, node->bytenr,
1690                                           node->num_bytes, parent, ref_root,
1691                                           ref->level, 0, 1, extent_op);
1692         } else {
1693                 BUG();
1694         }
1695         return ret;
1696 }
1697
1698
1699 /* helper function to actually process a single delayed ref entry */
1700 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1701                                struct btrfs_root *root,
1702                                struct btrfs_delayed_ref_node *node,
1703                                struct btrfs_delayed_extent_op *extent_op,
1704                                int insert_reserved)
1705 {
1706         int ret;
1707         if (btrfs_delayed_ref_is_head(node)) {
1708                 struct btrfs_delayed_ref_head *head;
1709                 /*
1710                  * we've hit the end of the chain and we were supposed
1711                  * to insert this extent into the tree.  But, it got
1712                  * deleted before we ever needed to insert it, so all
1713                  * we have to do is clean up the accounting
1714                  */
1715                 BUG_ON(extent_op);
1716                 head = btrfs_delayed_node_to_head(node);
1717                 if (insert_reserved) {
1718                         if (head->is_data) {
1719                                 ret = btrfs_del_csums(trans, root,
1720                                                       node->bytenr,
1721                                                       node->num_bytes);
1722                                 BUG_ON(ret);
1723                         }
1724                         btrfs_update_pinned_extents(root, node->bytenr,
1725                                                     node->num_bytes, 1);
1726                         update_reserved_extents(root, node->bytenr,
1727                                                 node->num_bytes, 0);
1728                 }
1729                 mutex_unlock(&head->mutex);
1730                 return 0;
1731         }
1732
1733         if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1734             node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1735                 ret = run_delayed_tree_ref(trans, root, node, extent_op,
1736                                            insert_reserved);
1737         else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1738                  node->type == BTRFS_SHARED_DATA_REF_KEY)
1739                 ret = run_delayed_data_ref(trans, root, node, extent_op,
1740                                            insert_reserved);
1741         else
1742                 BUG();
1743         return ret;
1744 }
1745
1746 static noinline struct btrfs_delayed_ref_node *
1747 select_delayed_ref(struct btrfs_delayed_ref_head *head)
1748 {
1749         struct rb_node *node;
1750         struct btrfs_delayed_ref_node *ref;
1751         int action = BTRFS_ADD_DELAYED_REF;
1752 again:
1753         /*
1754          * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
1755          * this prevents ref count from going down to zero when
1756          * there still are pending delayed ref.
1757          */
1758         node = rb_prev(&head->node.rb_node);
1759         while (1) {
1760                 if (!node)
1761                         break;
1762                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
1763                                 rb_node);
1764                 if (ref->bytenr != head->node.bytenr)
1765                         break;
1766                 if (ref->action == action)
1767                         return ref;
1768                 node = rb_prev(node);
1769         }
1770         if (action == BTRFS_ADD_DELAYED_REF) {
1771                 action = BTRFS_DROP_DELAYED_REF;
1772                 goto again;
1773         }
1774         return NULL;
1775 }
1776
1777 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
1778                                        struct btrfs_root *root,
1779                                        struct list_head *cluster)
1780 {
1781         struct btrfs_delayed_ref_root *delayed_refs;
1782         struct btrfs_delayed_ref_node *ref;
1783         struct btrfs_delayed_ref_head *locked_ref = NULL;
1784         struct btrfs_delayed_extent_op *extent_op;
1785         int ret;
1786         int count = 0;
1787         int must_insert_reserved = 0;
1788
1789         delayed_refs = &trans->transaction->delayed_refs;
1790         while (1) {
1791                 if (!locked_ref) {
1792                         /* pick a new head ref from the cluster list */
1793                         if (list_empty(cluster))
1794                                 break;
1795
1796                         locked_ref = list_entry(cluster->next,
1797                                      struct btrfs_delayed_ref_head, cluster);
1798
1799                         /* grab the lock that says we are going to process
1800                          * all the refs for this head */
1801                         ret = btrfs_delayed_ref_lock(trans, locked_ref);
1802
1803                         /*
1804                          * we may have dropped the spin lock to get the head
1805                          * mutex lock, and that might have given someone else
1806                          * time to free the head.  If that's true, it has been
1807                          * removed from our list and we can move on.
1808                          */
1809                         if (ret == -EAGAIN) {
1810                                 locked_ref = NULL;
1811                                 count++;
1812                                 continue;
1813                         }
1814                 }
1815
1816                 /*
1817                  * record the must insert reserved flag before we
1818                  * drop the spin lock.
1819                  */
1820                 must_insert_reserved = locked_ref->must_insert_reserved;
1821                 locked_ref->must_insert_reserved = 0;
1822
1823                 extent_op = locked_ref->extent_op;
1824                 locked_ref->extent_op = NULL;
1825
1826                 /*
1827                  * locked_ref is the head node, so we have to go one
1828                  * node back for any delayed ref updates
1829                  */
1830                 ref = select_delayed_ref(locked_ref);
1831                 if (!ref) {
1832                         /* All delayed refs have been processed, Go ahead
1833                          * and send the head node to run_one_delayed_ref,
1834                          * so that any accounting fixes can happen
1835                          */
1836                         ref = &locked_ref->node;
1837
1838                         if (extent_op && must_insert_reserved) {
1839                                 kfree(extent_op);
1840                                 extent_op = NULL;
1841                         }
1842
1843                         if (extent_op) {
1844                                 spin_unlock(&delayed_refs->lock);
1845
1846                                 ret = run_delayed_extent_op(trans, root,
1847                                                             ref, extent_op);
1848                                 BUG_ON(ret);
1849                                 kfree(extent_op);
1850
1851                                 cond_resched();
1852                                 spin_lock(&delayed_refs->lock);
1853                                 continue;
1854                         }
1855
1856                         list_del_init(&locked_ref->cluster);
1857                         locked_ref = NULL;
1858                 }
1859
1860                 ref->in_tree = 0;
1861                 rb_erase(&ref->rb_node, &delayed_refs->root);
1862                 delayed_refs->num_entries--;
1863
1864                 spin_unlock(&delayed_refs->lock);
1865
1866                 ret = run_one_delayed_ref(trans, root, ref, extent_op,
1867                                           must_insert_reserved);
1868                 BUG_ON(ret);
1869
1870                 btrfs_put_delayed_ref(ref);
1871                 kfree(extent_op);
1872                 count++;
1873
1874                 cond_resched();
1875                 spin_lock(&delayed_refs->lock);
1876         }
1877         return count;
1878 }
1879
1880 /*
1881  * this starts processing the delayed reference count updates and
1882  * extent insertions we have queued up so far.  count can be
1883  * 0, which means to process everything in the tree at the start
1884  * of the run (but not newly added entries), or it can be some target
1885  * number you'd like to process.
1886  */
1887 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1888                            struct btrfs_root *root, unsigned long count)
1889 {
1890         struct rb_node *node;
1891         struct btrfs_delayed_ref_root *delayed_refs;
1892         struct btrfs_delayed_ref_node *ref;
1893         struct list_head cluster;
1894         int ret;
1895         int run_all = count == (unsigned long)-1;
1896         int run_most = 0;
1897
1898         if (root == root->fs_info->extent_root)
1899                 root = root->fs_info->tree_root;
1900
1901         delayed_refs = &trans->transaction->delayed_refs;
1902         INIT_LIST_HEAD(&cluster);
1903 again:
1904         spin_lock(&delayed_refs->lock);
1905         if (count == 0) {
1906                 count = delayed_refs->num_entries * 2;
1907                 run_most = 1;
1908         }
1909         while (1) {
1910                 if (!(run_all || run_most) &&
1911                     delayed_refs->num_heads_ready < 64)
1912                         break;
1913
1914                 /*
1915                  * go find something we can process in the rbtree.  We start at
1916                  * the beginning of the tree, and then build a cluster
1917                  * of refs to process starting at the first one we are able to
1918                  * lock
1919                  */
1920                 ret = btrfs_find_ref_cluster(trans, &cluster,
1921                                              delayed_refs->run_delayed_start);
1922                 if (ret)
1923                         break;
1924
1925                 ret = run_clustered_refs(trans, root, &cluster);
1926                 BUG_ON(ret < 0);
1927
1928                 count -= min_t(unsigned long, ret, count);
1929
1930                 if (count == 0)
1931                         break;
1932         }
1933
1934         if (run_all) {
1935                 node = rb_first(&delayed_refs->root);
1936                 if (!node)
1937                         goto out;
1938                 count = (unsigned long)-1;
1939
1940                 while (node) {
1941                         ref = rb_entry(node, struct btrfs_delayed_ref_node,
1942                                        rb_node);
1943                         if (btrfs_delayed_ref_is_head(ref)) {
1944                                 struct btrfs_delayed_ref_head *head;
1945
1946                                 head = btrfs_delayed_node_to_head(ref);
1947                                 atomic_inc(&ref->refs);
1948
1949                                 spin_unlock(&delayed_refs->lock);
1950                                 mutex_lock(&head->mutex);
1951                                 mutex_unlock(&head->mutex);
1952
1953                                 btrfs_put_delayed_ref(ref);
1954                                 cond_resched();
1955                                 goto again;
1956                         }
1957                         node = rb_next(node);
1958                 }
1959                 spin_unlock(&delayed_refs->lock);
1960                 schedule_timeout(1);
1961                 goto again;
1962         }
1963 out:
1964         spin_unlock(&delayed_refs->lock);
1965         return 0;
1966 }
1967
1968 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1969                                 struct btrfs_root *root,
1970                                 u64 bytenr, u64 num_bytes, u64 flags,
1971                                 int is_data)
1972 {
1973         struct btrfs_delayed_extent_op *extent_op;
1974         int ret;
1975
1976         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1977         if (!extent_op)
1978                 return -ENOMEM;
1979
1980         extent_op->flags_to_set = flags;
1981         extent_op->update_flags = 1;
1982         extent_op->update_key = 0;
1983         extent_op->is_data = is_data ? 1 : 0;
1984
1985         ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
1986         if (ret)
1987                 kfree(extent_op);
1988         return ret;
1989 }
1990
1991 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
1992                                       struct btrfs_root *root,
1993                                       struct btrfs_path *path,
1994                                       u64 objectid, u64 offset, u64 bytenr)
1995 {
1996         struct btrfs_delayed_ref_head *head;
1997         struct btrfs_delayed_ref_node *ref;
1998         struct btrfs_delayed_data_ref *data_ref;
1999         struct btrfs_delayed_ref_root *delayed_refs;
2000         struct rb_node *node;
2001         int ret = 0;
2002
2003         ret = -ENOENT;
2004         delayed_refs = &trans->transaction->delayed_refs;
2005         spin_lock(&delayed_refs->lock);
2006         head = btrfs_find_delayed_ref_head(trans, bytenr);
2007         if (!head)
2008                 goto out;
2009
2010         if (!mutex_trylock(&head->mutex)) {
2011                 atomic_inc(&head->node.refs);
2012                 spin_unlock(&delayed_refs->lock);
2013
2014                 btrfs_release_path(root->fs_info->extent_root, path);
2015
2016                 mutex_lock(&head->mutex);
2017                 mutex_unlock(&head->mutex);
2018                 btrfs_put_delayed_ref(&head->node);
2019                 return -EAGAIN;
2020         }
2021
2022         node = rb_prev(&head->node.rb_node);
2023         if (!node)
2024                 goto out_unlock;
2025
2026         ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2027
2028         if (ref->bytenr != bytenr)
2029                 goto out_unlock;
2030
2031         ret = 1;
2032         if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2033                 goto out_unlock;
2034
2035         data_ref = btrfs_delayed_node_to_data_ref(ref);
2036
2037         node = rb_prev(node);
2038         if (node) {
2039                 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2040                 if (ref->bytenr == bytenr)
2041                         goto out_unlock;
2042         }
2043
2044         if (data_ref->root != root->root_key.objectid ||
2045             data_ref->objectid != objectid || data_ref->offset != offset)
2046                 goto out_unlock;
2047
2048         ret = 0;
2049 out_unlock:
2050         mutex_unlock(&head->mutex);
2051 out:
2052         spin_unlock(&delayed_refs->lock);
2053         return ret;
2054 }
2055
2056 static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2057                                         struct btrfs_root *root,
2058                                         struct btrfs_path *path,
2059                                         u64 objectid, u64 offset, u64 bytenr)
2060 {
2061         struct btrfs_root *extent_root = root->fs_info->extent_root;
2062         struct extent_buffer *leaf;
2063         struct btrfs_extent_data_ref *ref;
2064         struct btrfs_extent_inline_ref *iref;
2065         struct btrfs_extent_item *ei;
2066         struct btrfs_key key;
2067         u32 item_size;
2068         int ret;
2069
2070         key.objectid = bytenr;
2071         key.offset = (u64)-1;
2072         key.type = BTRFS_EXTENT_ITEM_KEY;
2073
2074         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2075         if (ret < 0)
2076                 goto out;
2077         BUG_ON(ret == 0);
2078
2079         ret = -ENOENT;
2080         if (path->slots[0] == 0)
2081                 goto out;
2082
2083         path->slots[0]--;
2084         leaf = path->nodes[0];
2085         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2086
2087         if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2088                 goto out;
2089
2090         ret = 1;
2091         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2093         if (item_size < sizeof(*ei)) {
2094                 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2095                 goto out;
2096         }
2097 #endif
2098         ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2099
2100         if (item_size != sizeof(*ei) +
2101             btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2102                 goto out;
2103
2104         if (btrfs_extent_generation(leaf, ei) <=
2105             btrfs_root_last_snapshot(&root->root_item))
2106                 goto out;
2107
2108         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2109         if (btrfs_extent_inline_ref_type(leaf, iref) !=
2110             BTRFS_EXTENT_DATA_REF_KEY)
2111                 goto out;
2112
2113         ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2114         if (btrfs_extent_refs(leaf, ei) !=
2115             btrfs_extent_data_ref_count(leaf, ref) ||
2116             btrfs_extent_data_ref_root(leaf, ref) !=
2117             root->root_key.objectid ||
2118             btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2119             btrfs_extent_data_ref_offset(leaf, ref) != offset)
2120                 goto out;
2121
2122         ret = 0;
2123 out:
2124         return ret;
2125 }
2126
2127 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2128                           struct btrfs_root *root,
2129                           u64 objectid, u64 offset, u64 bytenr)
2130 {
2131         struct btrfs_path *path;
2132         int ret;
2133         int ret2;
2134
2135         path = btrfs_alloc_path();
2136         if (!path)
2137                 return -ENOENT;
2138
2139         do {
2140                 ret = check_committed_ref(trans, root, path, objectid,
2141                                           offset, bytenr);
2142                 if (ret && ret != -ENOENT)
2143                         goto out;
2144
2145                 ret2 = check_delayed_ref(trans, root, path, objectid,
2146                                          offset, bytenr);
2147         } while (ret2 == -EAGAIN);
2148
2149         if (ret2 && ret2 != -ENOENT) {
2150                 ret = ret2;
2151                 goto out;
2152         }
2153
2154         if (ret != -ENOENT || ret2 != -ENOENT)
2155                 ret = 0;
2156 out:
2157         btrfs_free_path(path);
2158         return ret;
2159 }
2160
2161 #if 0
2162 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2163                     struct extent_buffer *buf, u32 nr_extents)
2164 {
2165         struct btrfs_key key;
2166         struct btrfs_file_extent_item *fi;
2167         u64 root_gen;
2168         u32 nritems;
2169         int i;
2170         int level;
2171         int ret = 0;
2172         int shared = 0;
2173
2174         if (!root->ref_cows)
2175                 return 0;
2176
2177         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2178                 shared = 0;
2179                 root_gen = root->root_key.offset;
2180         } else {
2181                 shared = 1;
2182                 root_gen = trans->transid - 1;
2183         }
2184
2185         level = btrfs_header_level(buf);
2186         nritems = btrfs_header_nritems(buf);
2187
2188         if (level == 0) {
2189                 struct btrfs_leaf_ref *ref;
2190                 struct btrfs_extent_info *info;
2191
2192                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
2193                 if (!ref) {
2194                         ret = -ENOMEM;
2195                         goto out;
2196                 }
2197
2198                 ref->root_gen = root_gen;
2199                 ref->bytenr = buf->start;
2200                 ref->owner = btrfs_header_owner(buf);
2201                 ref->generation = btrfs_header_generation(buf);
2202                 ref->nritems = nr_extents;
2203                 info = ref->extents;
2204
2205                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
2206                         u64 disk_bytenr;
2207                         btrfs_item_key_to_cpu(buf, &key, i);
2208                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2209                                 continue;
2210                         fi = btrfs_item_ptr(buf, i,
2211                                             struct btrfs_file_extent_item);
2212                         if (btrfs_file_extent_type(buf, fi) ==
2213                             BTRFS_FILE_EXTENT_INLINE)
2214                                 continue;
2215                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2216                         if (disk_bytenr == 0)
2217                                 continue;
2218
2219                         info->bytenr = disk_bytenr;
2220                         info->num_bytes =
2221                                 btrfs_file_extent_disk_num_bytes(buf, fi);
2222                         info->objectid = key.objectid;
2223                         info->offset = key.offset;
2224                         info++;
2225                 }
2226
2227                 ret = btrfs_add_leaf_ref(root, ref, shared);
2228                 if (ret == -EEXIST && shared) {
2229                         struct btrfs_leaf_ref *old;
2230                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2231                         BUG_ON(!old);
2232                         btrfs_remove_leaf_ref(root, old);
2233                         btrfs_free_leaf_ref(root, old);
2234                         ret = btrfs_add_leaf_ref(root, ref, shared);
2235                 }
2236                 WARN_ON(ret);
2237                 btrfs_free_leaf_ref(root, ref);
2238         }
2239 out:
2240         return ret;
2241 }
2242
2243 /* when a block goes through cow, we update the reference counts of
2244  * everything that block points to.  The internal pointers of the block
2245  * can be in just about any order, and it is likely to have clusters of
2246  * things that are close together and clusters of things that are not.
2247  *
2248  * To help reduce the seeks that come with updating all of these reference
2249  * counts, sort them by byte number before actual updates are done.
2250  *
2251  * struct refsort is used to match byte number to slot in the btree block.
2252  * we sort based on the byte number and then use the slot to actually
2253  * find the item.
2254  *
2255  * struct refsort is smaller than strcut btrfs_item and smaller than
2256  * struct btrfs_key_ptr.  Since we're currently limited to the page size
2257  * for a btree block, there's no way for a kmalloc of refsorts for a
2258  * single node to be bigger than a page.
2259  */
2260 struct refsort {
2261         u64 bytenr;
2262         u32 slot;
2263 };
2264
2265 /*
2266  * for passing into sort()
2267  */
2268 static int refsort_cmp(const void *a_void, const void *b_void)
2269 {
2270         const struct refsort *a = a_void;
2271         const struct refsort *b = b_void;
2272
2273         if (a->bytenr < b->bytenr)
2274                 return -1;
2275         if (a->bytenr > b->bytenr)
2276                 return 1;
2277         return 0;
2278 }
2279 #endif
2280
2281 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2282                            struct btrfs_root *root,
2283                            struct extent_buffer *buf,
2284                            int full_backref, int inc)
2285 {
2286         u64 bytenr;
2287         u64 num_bytes;
2288         u64 parent;
2289         u64 ref_root;
2290         u32 nritems;
2291         struct btrfs_key key;
2292         struct btrfs_file_extent_item *fi;
2293         int i;
2294         int level;
2295         int ret = 0;
2296         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
2297                             u64, u64, u64, u64, u64, u64);
2298
2299         ref_root = btrfs_header_owner(buf);
2300         nritems = btrfs_header_nritems(buf);
2301         level = btrfs_header_level(buf);
2302
2303         if (!root->ref_cows && level == 0)
2304                 return 0;
2305
2306         if (inc)
2307                 process_func = btrfs_inc_extent_ref;
2308         else
2309                 process_func = btrfs_free_extent;
2310
2311         if (full_backref)
2312                 parent = buf->start;
2313         else
2314                 parent = 0;
2315
2316         for (i = 0; i < nritems; i++) {
2317                 if (level == 0) {
2318                         btrfs_item_key_to_cpu(buf, &key, i);
2319                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2320                                 continue;
2321                         fi = btrfs_item_ptr(buf, i,
2322                                             struct btrfs_file_extent_item);
2323                         if (btrfs_file_extent_type(buf, fi) ==
2324                             BTRFS_FILE_EXTENT_INLINE)
2325                                 continue;
2326                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2327                         if (bytenr == 0)
2328                                 continue;
2329
2330                         num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2331                         key.offset -= btrfs_file_extent_offset(buf, fi);
2332                         ret = process_func(trans, root, bytenr, num_bytes,
2333                                            parent, ref_root, key.objectid,
2334                                            key.offset);
2335                         if (ret)
2336                                 goto fail;
2337                 } else {
2338                         bytenr = btrfs_node_blockptr(buf, i);
2339                         num_bytes = btrfs_level_size(root, level - 1);
2340                         ret = process_func(trans, root, bytenr, num_bytes,
2341                                            parent, ref_root, level - 1, 0);
2342                         if (ret)
2343                                 goto fail;
2344                 }
2345         }
2346         return 0;
2347 fail:
2348         BUG();
2349         return ret;
2350 }
2351
2352 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2353                   struct extent_buffer *buf, int full_backref)
2354 {
2355         return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2356 }
2357
2358 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2359                   struct extent_buffer *buf, int full_backref)
2360 {
2361         return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2362 }
2363
2364 static int write_one_cache_group(struct btrfs_trans_handle *trans,
2365                                  struct btrfs_root *root,
2366                                  struct btrfs_path *path,
2367                                  struct btrfs_block_group_cache *cache)
2368 {
2369         int ret;
2370         struct btrfs_root *extent_root = root->fs_info->extent_root;
2371         unsigned long bi;
2372         struct extent_buffer *leaf;
2373
2374         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
2375         if (ret < 0)
2376                 goto fail;
2377         BUG_ON(ret);
2378
2379         leaf = path->nodes[0];
2380         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2381         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2382         btrfs_mark_buffer_dirty(leaf);
2383         btrfs_release_path(extent_root, path);
2384 fail:
2385         if (ret)
2386                 return ret;
2387         return 0;
2388
2389 }
2390
2391 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2392                                    struct btrfs_root *root)
2393 {
2394         struct btrfs_block_group_cache *cache, *entry;
2395         struct rb_node *n;
2396         int err = 0;
2397         int werr = 0;
2398         struct btrfs_path *path;
2399         u64 last = 0;
2400
2401         path = btrfs_alloc_path();
2402         if (!path)
2403                 return -ENOMEM;
2404
2405         while (1) {
2406                 cache = NULL;
2407                 spin_lock(&root->fs_info->block_group_cache_lock);
2408                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
2409                      n; n = rb_next(n)) {
2410                         entry = rb_entry(n, struct btrfs_block_group_cache,
2411                                          cache_node);
2412                         if (entry->dirty) {
2413                                 cache = entry;
2414                                 break;
2415                         }
2416                 }
2417                 spin_unlock(&root->fs_info->block_group_cache_lock);
2418
2419                 if (!cache)
2420                         break;
2421
2422                 cache->dirty = 0;
2423                 last += cache->key.offset;
2424
2425                 err = write_one_cache_group(trans, root,
2426                                             path, cache);
2427                 /*
2428                  * if we fail to write the cache group, we want
2429                  * to keep it marked dirty in hopes that a later
2430                  * write will work
2431                  */
2432                 if (err) {
2433                         werr = err;
2434                         continue;
2435                 }
2436         }
2437         btrfs_free_path(path);
2438         return werr;
2439 }
2440
2441 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
2442 {
2443         struct btrfs_block_group_cache *block_group;
2444         int readonly = 0;
2445
2446         block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
2447         if (!block_group || block_group->ro)
2448                 readonly = 1;
2449         if (block_group)
2450                 btrfs_put_block_group(block_group);
2451         return readonly;
2452 }
2453
2454 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2455                              u64 total_bytes, u64 bytes_used,
2456                              struct btrfs_space_info **space_info)
2457 {
2458         struct btrfs_space_info *found;
2459
2460         found = __find_space_info(info, flags);
2461         if (found) {
2462                 spin_lock(&found->lock);
2463                 found->total_bytes += total_bytes;
2464                 found->bytes_used += bytes_used;
2465                 found->full = 0;
2466                 spin_unlock(&found->lock);
2467                 *space_info = found;
2468                 return 0;
2469         }
2470         found = kzalloc(sizeof(*found), GFP_NOFS);
2471         if (!found)
2472                 return -ENOMEM;
2473
2474         INIT_LIST_HEAD(&found->block_groups);
2475         init_rwsem(&found->groups_sem);
2476         spin_lock_init(&found->lock);
2477         found->flags = flags;
2478         found->total_bytes = total_bytes;
2479         found->bytes_used = bytes_used;
2480         found->bytes_pinned = 0;
2481         found->bytes_reserved = 0;
2482         found->bytes_readonly = 0;
2483         found->bytes_delalloc = 0;
2484         found->full = 0;
2485         found->force_alloc = 0;
2486         *space_info = found;
2487         list_add_rcu(&found->list, &info->space_info);
2488         return 0;
2489 }
2490
2491 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2492 {
2493         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
2494                                    BTRFS_BLOCK_GROUP_RAID1 |
2495                                    BTRFS_BLOCK_GROUP_RAID10 |
2496                                    BTRFS_BLOCK_GROUP_DUP);
2497         if (extra_flags) {
2498                 if (flags & BTRFS_BLOCK_GROUP_DATA)
2499                         fs_info->avail_data_alloc_bits |= extra_flags;
2500                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
2501                         fs_info->avail_metadata_alloc_bits |= extra_flags;
2502                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
2503                         fs_info->avail_system_alloc_bits |= extra_flags;
2504         }
2505 }
2506
2507 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
2508 {
2509         spin_lock(&cache->space_info->lock);
2510         spin_lock(&cache->lock);
2511         if (!cache->ro) {
2512                 cache->space_info->bytes_readonly += cache->key.offset -
2513                                         btrfs_block_group_used(&cache->item);
2514                 cache->ro = 1;
2515         }
2516         spin_unlock(&cache->lock);
2517         spin_unlock(&cache->space_info->lock);
2518 }
2519
2520 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2521 {
2522         u64 num_devices = root->fs_info->fs_devices->rw_devices;
2523
2524         if (num_devices == 1)
2525                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
2526         if (num_devices < 4)
2527                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
2528
2529         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
2530             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2531                       BTRFS_BLOCK_GROUP_RAID10))) {
2532                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
2533         }
2534
2535         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
2536             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
2537                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
2538         }
2539
2540         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
2541             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
2542              (flags & BTRFS_BLOCK_GROUP_RAID10) |
2543              (flags & BTRFS_BLOCK_GROUP_DUP)))
2544                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
2545         return flags;
2546 }
2547
2548 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
2549 {
2550         struct btrfs_fs_info *info = root->fs_info;
2551         u64 alloc_profile;
2552
2553         if (data) {
2554                 alloc_profile = info->avail_data_alloc_bits &
2555                         info->data_alloc_profile;
2556                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2557         } else if (root == root->fs_info->chunk_root) {
2558                 alloc_profile = info->avail_system_alloc_bits &
2559                         info->system_alloc_profile;
2560                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2561         } else {
2562                 alloc_profile = info->avail_metadata_alloc_bits &
2563                         info->metadata_alloc_profile;
2564                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2565         }
2566
2567         return btrfs_reduce_alloc_profile(root, data);
2568 }
2569
2570 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2571 {
2572         u64 alloc_target;
2573
2574         alloc_target = btrfs_get_alloc_profile(root, 1);
2575         BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2576                                                        alloc_target);
2577 }
2578
2579 /*
2580  * for now this just makes sure we have at least 5% of our metadata space free
2581  * for use.
2582  */
2583 int btrfs_check_metadata_free_space(struct btrfs_root *root)
2584 {
2585         struct btrfs_fs_info *info = root->fs_info;
2586         struct btrfs_space_info *meta_sinfo;
2587         u64 alloc_target, thresh;
2588         int committed = 0, ret;
2589
2590         /* get the space info for where the metadata will live */
2591         alloc_target = btrfs_get_alloc_profile(root, 0);
2592         meta_sinfo = __find_space_info(info, alloc_target);
2593
2594 again:
2595         spin_lock(&meta_sinfo->lock);
2596         if (!meta_sinfo->full)
2597                 thresh = meta_sinfo->total_bytes * 80;
2598         else
2599                 thresh = meta_sinfo->total_bytes * 95;
2600
2601         do_div(thresh, 100);
2602
2603         if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2604             meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
2605                 struct btrfs_trans_handle *trans;
2606                 if (!meta_sinfo->full) {
2607                         meta_sinfo->force_alloc = 1;
2608                         spin_unlock(&meta_sinfo->lock);
2609
2610                         trans = btrfs_start_transaction(root, 1);
2611                         if (!trans)
2612                                 return -ENOMEM;
2613
2614                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2615                                              2 * 1024 * 1024, alloc_target, 0);
2616                         btrfs_end_transaction(trans, root);
2617                         goto again;
2618                 }
2619                 spin_unlock(&meta_sinfo->lock);
2620
2621                 if (!committed) {
2622                         committed = 1;
2623                         trans = btrfs_join_transaction(root, 1);
2624                         if (!trans)
2625                                 return -ENOMEM;
2626                         ret = btrfs_commit_transaction(trans, root);
2627                         if (ret)
2628                                 return ret;
2629                         goto again;
2630                 }
2631                 return -ENOSPC;
2632         }
2633         spin_unlock(&meta_sinfo->lock);
2634
2635         return 0;
2636 }
2637
2638 /*
2639  * This will check the space that the inode allocates from to make sure we have
2640  * enough space for bytes.
2641  */
2642 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2643                                 u64 bytes)
2644 {
2645         struct btrfs_space_info *data_sinfo;
2646         int ret = 0, committed = 0;
2647
2648         /* make sure bytes are sectorsize aligned */
2649         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2650
2651         data_sinfo = BTRFS_I(inode)->space_info;
2652 again:
2653         /* make sure we have enough space to handle the data first */
2654         spin_lock(&data_sinfo->lock);
2655         if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2656             data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2657             data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2658             data_sinfo->bytes_may_use < bytes) {
2659                 struct btrfs_trans_handle *trans;
2660
2661                 /*
2662                  * if we don't have enough free bytes in this space then we need
2663                  * to alloc a new chunk.
2664                  */
2665                 if (!data_sinfo->full) {
2666                         u64 alloc_target;
2667
2668                         data_sinfo->force_alloc = 1;
2669                         spin_unlock(&data_sinfo->lock);
2670
2671                         alloc_target = btrfs_get_alloc_profile(root, 1);
2672                         trans = btrfs_start_transaction(root, 1);
2673                         if (!trans)
2674                                 return -ENOMEM;
2675
2676                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2677                                              bytes + 2 * 1024 * 1024,
2678                                              alloc_target, 0);
2679                         btrfs_end_transaction(trans, root);
2680                         if (ret)
2681                                 return ret;
2682                         goto again;
2683                 }
2684                 spin_unlock(&data_sinfo->lock);
2685
2686                 /* commit the current transaction and try again */
2687                 if (!committed) {
2688                         committed = 1;
2689                         trans = btrfs_join_transaction(root, 1);
2690                         if (!trans)
2691                                 return -ENOMEM;
2692                         ret = btrfs_commit_transaction(trans, root);
2693                         if (ret)
2694                                 return ret;
2695                         goto again;
2696                 }
2697
2698                 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699                        ", %llu bytes_used, %llu bytes_reserved, "
2700                        "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
2701                        "%llu total\n", (unsigned long long)bytes,
2702                        (unsigned long long)data_sinfo->bytes_delalloc,
2703                        (unsigned long long)data_sinfo->bytes_used,
2704                        (unsigned long long)data_sinfo->bytes_reserved,
2705                        (unsigned long long)data_sinfo->bytes_pinned,
2706                        (unsigned long long)data_sinfo->bytes_readonly,
2707                        (unsigned long long)data_sinfo->bytes_may_use,
2708                        (unsigned long long)data_sinfo->total_bytes);
2709                 return -ENOSPC;
2710         }
2711         data_sinfo->bytes_may_use += bytes;
2712         BTRFS_I(inode)->reserved_bytes += bytes;
2713         spin_unlock(&data_sinfo->lock);
2714
2715         return btrfs_check_metadata_free_space(root);
2716 }
2717
2718 /*
2719  * if there was an error for whatever reason after calling
2720  * btrfs_check_data_free_space, call this so we can cleanup the counters.
2721  */
2722 void btrfs_free_reserved_data_space(struct btrfs_root *root,
2723                                     struct inode *inode, u64 bytes)
2724 {
2725         struct btrfs_space_info *data_sinfo;
2726
2727         /* make sure bytes are sectorsize aligned */
2728         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2729
2730         data_sinfo = BTRFS_I(inode)->space_info;
2731         spin_lock(&data_sinfo->lock);
2732         data_sinfo->bytes_may_use -= bytes;
2733         BTRFS_I(inode)->reserved_bytes -= bytes;
2734         spin_unlock(&data_sinfo->lock);
2735 }
2736
2737 /* called when we are adding a delalloc extent to the inode's io_tree */
2738 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2739                                   u64 bytes)
2740 {
2741         struct btrfs_space_info *data_sinfo;
2742
2743         /* get the space info for where this inode will be storing its data */
2744         data_sinfo = BTRFS_I(inode)->space_info;
2745
2746         /* make sure we have enough space to handle the data first */
2747         spin_lock(&data_sinfo->lock);
2748         data_sinfo->bytes_delalloc += bytes;
2749
2750         /*
2751          * we are adding a delalloc extent without calling
2752          * btrfs_check_data_free_space first.  This happens on a weird
2753          * writepage condition, but shouldn't hurt our accounting
2754          */
2755         if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
2756                 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
2757                 BTRFS_I(inode)->reserved_bytes = 0;
2758         } else {
2759                 data_sinfo->bytes_may_use -= bytes;
2760                 BTRFS_I(inode)->reserved_bytes -= bytes;
2761         }
2762
2763         spin_unlock(&data_sinfo->lock);
2764 }
2765
2766 /* called when we are clearing an delalloc extent from the inode's io_tree */
2767 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2768                               u64 bytes)
2769 {
2770         struct btrfs_space_info *info;
2771
2772         info = BTRFS_I(inode)->space_info;
2773
2774         spin_lock(&info->lock);
2775         info->bytes_delalloc -= bytes;
2776         spin_unlock(&info->lock);
2777 }
2778
2779 static void force_metadata_allocation(struct btrfs_fs_info *info)
2780 {
2781         struct list_head *head = &info->space_info;
2782         struct btrfs_space_info *found;
2783
2784         rcu_read_lock();
2785         list_for_each_entry_rcu(found, head, list) {
2786                 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
2787                         found->force_alloc = 1;
2788         }
2789         rcu_read_unlock();
2790 }
2791
2792 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2793                           struct btrfs_root *extent_root, u64 alloc_bytes,
2794                           u64 flags, int force)
2795 {
2796         struct btrfs_space_info *space_info;
2797         struct btrfs_fs_info *fs_info = extent_root->fs_info;
2798         u64 thresh;
2799         int ret = 0;
2800
2801         mutex_lock(&fs_info->chunk_mutex);
2802
2803         flags = btrfs_reduce_alloc_profile(extent_root, flags);
2804
2805         space_info = __find_space_info(extent_root->fs_info, flags);
2806         if (!space_info) {
2807                 ret = update_space_info(extent_root->fs_info, flags,
2808                                         0, 0, &space_info);
2809                 BUG_ON(ret);
2810         }
2811         BUG_ON(!space_info);
2812
2813         spin_lock(&space_info->lock);
2814         if (space_info->force_alloc) {
2815                 force = 1;
2816                 space_info->force_alloc = 0;
2817         }
2818         if (space_info->full) {
2819                 spin_unlock(&space_info->lock);
2820                 goto out;
2821         }
2822
2823         thresh = space_info->total_bytes - space_info->bytes_readonly;
2824         thresh = div_factor(thresh, 6);
2825         if (!force &&
2826            (space_info->bytes_used + space_info->bytes_pinned +
2827             space_info->bytes_reserved + alloc_bytes) < thresh) {
2828                 spin_unlock(&space_info->lock);
2829                 goto out;
2830         }
2831         spin_unlock(&space_info->lock);
2832
2833         /*
2834          * if we're doing a data chunk, go ahead and make sure that
2835          * we keep a reasonable number of metadata chunks allocated in the
2836          * FS as well.
2837          */
2838         if (flags & BTRFS_BLOCK_GROUP_DATA) {
2839                 fs_info->data_chunk_allocations++;
2840                 if (!(fs_info->data_chunk_allocations %
2841                       fs_info->metadata_ratio))
2842                         force_metadata_allocation(fs_info);
2843         }
2844
2845         ret = btrfs_alloc_chunk(trans, extent_root, flags);
2846         if (ret)
2847                 space_info->full = 1;
2848 out:
2849         mutex_unlock(&extent_root->fs_info->chunk_mutex);
2850         return ret;
2851 }
2852
2853 static int update_block_group(struct btrfs_trans_handle *trans,
2854                               struct btrfs_root *root,
2855                               u64 bytenr, u64 num_bytes, int alloc,
2856                               int mark_free)
2857 {
2858         struct btrfs_block_group_cache *cache;
2859         struct btrfs_fs_info *info = root->fs_info;
2860         u64 total = num_bytes;
2861         u64 old_val;
2862         u64 byte_in_group;
2863
2864         /* block accounting for super block */
2865         spin_lock(&info->delalloc_lock);
2866         old_val = btrfs_super_bytes_used(&info->super_copy);
2867         if (alloc)
2868                 old_val += num_bytes;
2869         else
2870                 old_val -= num_bytes;
2871         btrfs_set_super_bytes_used(&info->super_copy, old_val);
2872
2873         /* block accounting for root item */
2874         old_val = btrfs_root_used(&root->root_item);
2875         if (alloc)
2876                 old_val += num_bytes;
2877         else
2878                 old_val -= num_bytes;
2879         btrfs_set_root_used(&root->root_item, old_val);
2880         spin_unlock(&info->delalloc_lock);
2881
2882         while (total) {
2883                 cache = btrfs_lookup_block_group(info, bytenr);
2884                 if (!cache)
2885                         return -1;
2886                 byte_in_group = bytenr - cache->key.objectid;
2887                 WARN_ON(byte_in_group > cache->key.offset);
2888
2889                 spin_lock(&cache->space_info->lock);
2890                 spin_lock(&cache->lock);
2891                 cache->dirty = 1;
2892                 old_val = btrfs_block_group_used(&cache->item);
2893                 num_bytes = min(total, cache->key.offset - byte_in_group);
2894                 if (alloc) {
2895                         old_val += num_bytes;
2896                         cache->space_info->bytes_used += num_bytes;
2897                         if (cache->ro)
2898                                 cache->space_info->bytes_readonly -= num_bytes;
2899                         btrfs_set_block_group_used(&cache->item, old_val);
2900                         spin_unlock(&cache->lock);
2901                         spin_unlock(&cache->space_info->lock);
2902                 } else {
2903                         old_val -= num_bytes;
2904                         cache->space_info->bytes_used -= num_bytes;
2905                         if (cache->ro)
2906                                 cache->space_info->bytes_readonly += num_bytes;
2907                         btrfs_set_block_group_used(&cache->item, old_val);
2908                         spin_unlock(&cache->lock);
2909                         spin_unlock(&cache->space_info->lock);
2910                         if (mark_free) {
2911                                 int ret;
2912
2913                                 ret = btrfs_discard_extent(root, bytenr,
2914                                                            num_bytes);
2915                                 WARN_ON(ret);
2916
2917                                 ret = btrfs_add_free_space(cache, bytenr,
2918                                                            num_bytes);
2919                                 WARN_ON(ret);
2920                         }
2921                 }
2922                 btrfs_put_block_group(cache);
2923                 total -= num_bytes;
2924                 bytenr += num_bytes;
2925         }
2926         return 0;
2927 }
2928
2929 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2930 {
2931         struct btrfs_block_group_cache *cache;
2932         u64 bytenr;
2933
2934         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2935         if (!cache)
2936                 return 0;
2937
2938         bytenr = cache->key.objectid;
2939         btrfs_put_block_group(cache);
2940
2941         return bytenr;
2942 }
2943
2944 int btrfs_update_pinned_extents(struct btrfs_root *root,
2945                                 u64 bytenr, u64 num, int pin)
2946 {
2947         u64 len;
2948         struct btrfs_block_group_cache *cache;
2949         struct btrfs_fs_info *fs_info = root->fs_info;
2950
2951         if (pin) {
2952                 set_extent_dirty(&fs_info->pinned_extents,
2953                                 bytenr, bytenr + num - 1, GFP_NOFS);
2954         } else {
2955                 clear_extent_dirty(&fs_info->pinned_extents,
2956                                 bytenr, bytenr + num - 1, GFP_NOFS);
2957         }
2958
2959         while (num > 0) {
2960                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2961                 BUG_ON(!cache);
2962                 len = min(num, cache->key.offset -
2963                           (bytenr - cache->key.objectid));
2964                 if (pin) {
2965                         spin_lock(&cache->space_info->lock);
2966                         spin_lock(&cache->lock);
2967                         cache->pinned += len;
2968                         cache->space_info->bytes_pinned += len;
2969                         spin_unlock(&cache->lock);
2970                         spin_unlock(&cache->space_info->lock);
2971                         fs_info->total_pinned += len;
2972                 } else {
2973                         spin_lock(&cache->space_info->lock);
2974                         spin_lock(&cache->lock);
2975                         cache->pinned -= len;
2976                         cache->space_info->bytes_pinned -= len;
2977                         spin_unlock(&cache->lock);
2978                         spin_unlock(&cache->space_info->lock);
2979                         fs_info->total_pinned -= len;
2980                         if (cache->cached)
2981                                 btrfs_add_free_space(cache, bytenr, len);
2982                 }
2983                 btrfs_put_block_group(cache);
2984                 bytenr += len;
2985                 num -= len;
2986         }
2987         return 0;
2988 }
2989
2990 static int update_reserved_extents(struct btrfs_root *root,
2991                                    u64 bytenr, u64 num, int reserve)
2992 {
2993         u64 len;
2994         struct btrfs_block_group_cache *cache;
2995         struct btrfs_fs_info *fs_info = root->fs_info;
2996
2997         while (num > 0) {
2998                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2999                 BUG_ON(!cache);
3000                 len = min(num, cache->key.offset -
3001                           (bytenr - cache->key.objectid));
3002
3003                 spin_lock(&cache->space_info->lock);
3004                 spin_lock(&cache->lock);
3005                 if (reserve) {
3006                         cache->reserved += len;
3007                         cache->space_info->bytes_reserved += len;
3008                 } else {
3009                         cache->reserved -= len;
3010                         cache->space_info->bytes_reserved -= len;
3011                 }
3012                 spin_unlock(&cache->lock);
3013                 spin_unlock(&cache->space_info->lock);
3014                 btrfs_put_block_group(cache);
3015                 bytenr += len;
3016                 num -= len;
3017         }
3018         return 0;
3019 }
3020
3021 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3022 {
3023         u64 last = 0;
3024         u64 start;
3025         u64 end;
3026         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3027         int ret;
3028
3029         while (1) {
3030                 ret = find_first_extent_bit(pinned_extents, last,
3031                                             &start, &end, EXTENT_DIRTY);
3032                 if (ret)
3033                         break;
3034                 set_extent_dirty(copy, start, end, GFP_NOFS);
3035                 last = end + 1;
3036         }
3037         return 0;
3038 }
3039
3040 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3041                                struct btrfs_root *root,
3042                                struct extent_io_tree *unpin)
3043 {
3044         u64 start;
3045         u64 end;
3046         int ret;
3047
3048         while (1) {
3049                 ret = find_first_extent_bit(unpin, 0, &start, &end,
3050                                             EXTENT_DIRTY);
3051                 if (ret)
3052                         break;
3053
3054                 ret = btrfs_discard_extent(root, start, end + 1 - start);
3055
3056                 /* unlocks the pinned mutex */
3057                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3058                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3059
3060                 cond_resched();
3061         }
3062         return ret;
3063 }
3064
3065 static int pin_down_bytes(struct btrfs_trans_handle *trans,
3066                           struct btrfs_root *root,
3067                           struct btrfs_path *path,
3068                           u64 bytenr, u64 num_bytes, int is_data,
3069                           struct extent_buffer **must_clean)
3070 {
3071         int err = 0;
3072         struct extent_buffer *buf;
3073
3074         if (is_data)
3075                 goto pinit;
3076
3077         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3078         if (!buf)
3079                 goto pinit;
3080
3081         /* we can reuse a block if it hasn't been written
3082          * and it is from this transaction.  We can't
3083          * reuse anything from the tree log root because
3084          * it has tiny sub-transactions.
3085          */
3086         if (btrfs_buffer_uptodate(buf, 0) &&
3087             btrfs_try_tree_lock(buf)) {
3088                 u64 header_owner = btrfs_header_owner(buf);
3089                 u64 header_transid = btrfs_header_generation(buf);
3090                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
3091                     header_transid == trans->transid &&
3092                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3093                         *must_clean = buf;
3094                         return 1;
3095                 }
3096                 btrfs_tree_unlock(buf);
3097         }
3098         free_extent_buffer(buf);
3099 pinit:
3100         btrfs_set_path_blocking(path);
3101         /* unlocks the pinned mutex */
3102         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3103
3104         BUG_ON(err < 0);
3105         return 0;
3106 }
3107
3108
3109 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3110                                 struct btrfs_root *root,
3111                                 u64 bytenr, u64 num_bytes, u64 parent,
3112                                 u64 root_objectid, u64 owner_objectid,
3113                                 u64 owner_offset, int refs_to_drop,
3114                                 struct btrfs_delayed_extent_op *extent_op)
3115 {
3116         struct btrfs_key key;
3117         struct btrfs_path *path;
3118         struct btrfs_fs_info *info = root->fs_info;
3119         struct btrfs_root *extent_root = info->extent_root;
3120         struct extent_buffer *leaf;
3121         struct btrfs_extent_item *ei;
3122         struct btrfs_extent_inline_ref *iref;
3123         int ret;
3124         int is_data;
3125         int extent_slot = 0;
3126         int found_extent = 0;
3127         int num_to_del = 1;
3128         u32 item_size;
3129         u64 refs;
3130
3131         path = btrfs_alloc_path();
3132         if (!path)
3133                 return -ENOMEM;
3134
3135         path->reada = 1;
3136         path->leave_spinning = 1;
3137
3138         is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3139         BUG_ON(!is_data && refs_to_drop != 1);
3140
3141         ret = lookup_extent_backref(trans, extent_root, path, &iref,
3142                                     bytenr, num_bytes, parent,
3143                                     root_objectid, owner_objectid,
3144                                     owner_offset);
3145         if (ret == 0) {
3146                 extent_slot = path->slots[0];
3147                 while (extent_slot >= 0) {
3148                         btrfs_item_key_to_cpu(path->nodes[0], &key,
3149                                               extent_slot);
3150                         if (key.objectid != bytenr)
3151                                 break;
3152                         if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3153                             key.offset == num_bytes) {
3154                                 found_extent = 1;
3155                                 break;
3156                         }
3157                         if (path->slots[0] - extent_slot > 5)
3158                                 break;
3159                         extent_slot--;
3160                 }
3161 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3162                 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3163                 if (found_extent && item_size < sizeof(*ei))
3164                         found_extent = 0;