Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2011 23:43:52 +0000 (16:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2011 23:43:52 +0000 (16:43 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: make sure reserve_metadata_bytes doesn't leak out strange errors
  Btrfs: use the commit_root for reading free_space_inode crcs
  Btrfs: reduce extent_state lock contention for metadata
  Btrfs: remove lockdep magic from btrfs_next_leaf
  Btrfs: make a lockdep class for each root
  Btrfs: switch the btrfs tree locks to reader/writer
  Btrfs: fix deadlock when throttling transactions
  Btrfs: stop using highmem for extent_buffers
  Btrfs: fix BUG_ON() caused by ENOSPC when relocating space
  Btrfs: tag pages for writeback in sync
  Btrfs: fix enospc problems with delalloc
  Btrfs: don't flush delalloc arbitrarily
  Btrfs: use find_or_create_page instead of grab_cache_page
  Btrfs: use a worker thread to do caching
  Btrfs: fix how we merge extent states and deal with cached states
  Btrfs: use the normal checksumming infrastructure for free space cache
  Btrfs: serialize flushers in reserve_metadata_bytes
  Btrfs: do transaction space reservation before joining the transaction
  Btrfs: try to only do one btrfs_search_slot in do_setxattr

1  2 
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c

diff --combined fs/btrfs/ctree.h
index fe9287b064969d503e0e2c44f8c7446a8767ce9b,3be57c611040ffd279bae8377f673a5b467f23e9..365c4e1dde04968379b86568dbb129bd6f3f0dbc
@@@ -755,6 -755,8 +755,8 @@@ struct btrfs_space_info 
                                   chunks for this space */
        unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
  
+       unsigned int flush:1;           /* set if we are trying to make space */
        unsigned int force_alloc;       /* set if we need to force a chunk
                                           alloc for this space */
  
        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
        spinlock_t lock;
        struct rw_semaphore groups_sem;
-       atomic_t caching_threads;
+       wait_queue_head_t wait;
  };
  
  struct btrfs_block_rsv {
@@@ -824,6 -826,7 +826,7 @@@ struct btrfs_caching_control 
        struct list_head list;
        struct mutex mutex;
        wait_queue_head_t wait;
+       struct btrfs_work work;
        struct btrfs_block_group_cache *block_group;
        u64 progress;
        atomic_t count;
@@@ -1032,6 -1035,8 +1035,8 @@@ struct btrfs_fs_info 
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers endio_freespace_worker;
        struct btrfs_workers submit_workers;
+       struct btrfs_workers caching_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@@ -1219,7 -1224,7 +1224,7 @@@ struct btrfs_root 
         * right now this just gets used so that a root has its own devid
         * for stat.  It may be used for more later
         */
 -      struct super_block anon_super;
 +      dev_t anon_dev;
  };
  
  struct btrfs_ioctl_defrag_range_args {
@@@ -2128,7 -2133,7 +2133,7 @@@ static inline bool btrfs_mixed_space_in
  
  /* extent-tree.c */
  static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
-                                                int num_items)
+                                                unsigned num_items)
  {
        return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
                3 * num_items;
@@@ -2222,9 -2227,6 +2227,6 @@@ void btrfs_set_inode_space_info(struct 
  void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
  int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
  void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
- int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               int num_items);
  void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
  int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@@ -2330,7 -2332,7 +2332,7 @@@ struct btrfs_path *btrfs_alloc_path(voi
  void btrfs_free_path(struct btrfs_path *p);
  void btrfs_set_path_blocking(struct btrfs_path *p);
  void btrfs_clear_path_blocking(struct btrfs_path *p,
-                              struct extent_buffer *held);
+                              struct extent_buffer *held, int held_rw);
  void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
  
  int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@@ -2510,9 -2512,6 +2512,9 @@@ int btrfs_csum_truncate(struct btrfs_tr
  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list, int search_commit);
  /* inode.c */
 +struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
 +                                         size_t pg_offset, u64 start, u64 len,
 +                                         int create);
  
  /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
  #if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@@ -2605,7 -2604,7 +2607,7 @@@ int btrfs_defrag_file(struct inode *ino
  int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
                           struct inode *inode);
  int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
 -int btrfs_sync_file(struct file *file, int datasync);
 +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
  int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                            int skip_pinned);
  extern const struct file_operations btrfs_file_operations;
@@@ -2645,9 -2644,9 +2647,9 @@@ do {                                                            
  
  /* acl.c */
  #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 -int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
 +struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
  #else
 -#define btrfs_check_acl NULL
 +#define btrfs_get_acl NULL
  #endif
  int btrfs_init_acl(struct btrfs_trans_handle *trans,
                   struct inode *inode, struct inode *dir);
diff --combined fs/btrfs/disk-io.c
index b231ae13b2697ebcd5b88d848a81ddc83e01c990,94ecac33cf2d924ab55cff813b774c04acd285a9..07b3ac662e193e6b3a19180c3c3c9bfccbeb8028
@@@ -100,38 -100,83 +100,83 @@@ struct async_submit_bio 
        struct btrfs_work work;
  };
  
- /* These are used to set the lockdep class on the extent buffer locks.
-  * The class is set by the readpage_end_io_hook after the buffer has
-  * passed csum validation but before the pages are unlocked.
+ /*
+  * Lockdep class keys for extent_buffer->lock's in this root.  For a given
+  * eb, the lockdep key is determined by the btrfs_root it belongs to and
+  * the level the eb occupies in the tree.
+  *
+  * Different roots are used for different purposes and may nest inside each
+  * other and they require separate keysets.  As lockdep keys should be
+  * static, assign keysets according to the purpose of the root as indicated
+  * by btrfs_root->objectid.  This ensures that all special purpose roots
+  * have separate keysets.
   *
-  * The lockdep class is also set by btrfs_init_new_buffer on freshly
-  * allocated blocks.
+  * Lock-nesting across peer nodes is always done with the immediate parent
+  * node locked thus preventing deadlock.  As lockdep doesn't know this, use
+  * subclass to avoid triggering lockdep warning in such cases.
   *
-  * The class is based on the level in the tree block, which allows lockdep
-  * to know that lower nodes nest inside the locks of higher nodes.
+  * The key is set by the readpage_end_io_hook after the buffer has passed
+  * csum validation but before the pages are unlocked.  It is also set by
+  * btrfs_init_new_buffer on freshly allocated blocks.
   *
-  * We also add a check to make sure the highest level of the tree is
-  * the same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this
-  * code needs update as well.
+  * We also add a check to make sure the highest level of the tree is the
+  * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
+  * needs update as well.
   */
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  # if BTRFS_MAX_LEVEL != 8
  #  error
  # endif
- static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
- static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
-       /* leaf */
-       "btrfs-extent-00",
-       "btrfs-extent-01",
-       "btrfs-extent-02",
-       "btrfs-extent-03",
-       "btrfs-extent-04",
-       "btrfs-extent-05",
-       "btrfs-extent-06",
-       "btrfs-extent-07",
-       /* highest possible level */
-       "btrfs-extent-08",
+ static struct btrfs_lockdep_keyset {
+       u64                     id;             /* root objectid */
+       const char              *name_stem;     /* lock name stem */
+       char                    names[BTRFS_MAX_LEVEL + 1][20];
+       struct lock_class_key   keys[BTRFS_MAX_LEVEL + 1];
+ } btrfs_lockdep_keysets[] = {
+       { .id = BTRFS_ROOT_TREE_OBJECTID,       .name_stem = "root"     },
+       { .id = BTRFS_EXTENT_TREE_OBJECTID,     .name_stem = "extent"   },
+       { .id = BTRFS_CHUNK_TREE_OBJECTID,      .name_stem = "chunk"    },
+       { .id = BTRFS_DEV_TREE_OBJECTID,        .name_stem = "dev"      },
+       { .id = BTRFS_FS_TREE_OBJECTID,         .name_stem = "fs"       },
+       { .id = BTRFS_CSUM_TREE_OBJECTID,       .name_stem = "csum"     },
+       { .id = BTRFS_ORPHAN_OBJECTID,          .name_stem = "orphan"   },
+       { .id = BTRFS_TREE_LOG_OBJECTID,        .name_stem = "log"      },
+       { .id = BTRFS_TREE_RELOC_OBJECTID,      .name_stem = "treloc"   },
+       { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc"   },
+       { .id = 0,                              .name_stem = "tree"     },
  };
+ void __init btrfs_init_lockdep(void)
+ {
+       int i, j;
+       /* initialize lockdep class names */
+       for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
+               struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
+               for (j = 0; j < ARRAY_SIZE(ks->names); j++)
+                       snprintf(ks->names[j], sizeof(ks->names[j]),
+                                "btrfs-%s-%02d", ks->name_stem, j);
+       }
+ }
+ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
+                                   int level)
+ {
+       struct btrfs_lockdep_keyset *ks;
+       BUG_ON(level >= ARRAY_SIZE(ks->keys));
+       /* find the matching keyset, id 0 is the default entry */
+       for (ks = btrfs_lockdep_keysets; ks->id; ks++)
+               if (ks->id == objectid)
+                       break;
+       lockdep_set_class_and_name(&eb->lock,
+                                  &ks->keys[level], ks->names[level]);
+ }
  #endif
  
  /*
@@@ -217,7 -262,6 +262,6 @@@ static int csum_tree_block(struct btrfs
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
-       char *map_token = NULL;
        char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
        len = buf->len - offset;
        while (len > 0) {
                err = map_private_extent_buffer(buf, offset, 32,
-                                       &map_token, &kaddr,
-                                       &map_start, &map_len, KM_USER0);
+                                       &kaddr, &map_start, &map_len);
                if (err)
                        return 1;
                cur_len = min(len, map_len - (offset - map_start));
                                      crc, cur_len);
                len -= cur_len;
                offset += cur_len;
-               unmap_extent_buffer(buf, map_token, KM_USER0);
        }
        if (csum_size > sizeof(inline_result)) {
                result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
@@@ -494,15 -536,6 +536,6 @@@ static noinline int check_leaf(struct b
        return 0;
  }
  
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
- {
-       lockdep_set_class_and_name(&eb->lock,
-                          &btrfs_eb_class[level],
-                          btrfs_eb_name[level]);
- }
- #endif
  static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state)
  {
        }
        found_level = btrfs_header_level(eb);
  
-       btrfs_set_buffer_lockdep_class(eb, found_level);
+       btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
+                                      eb, found_level);
  
        ret = csum_tree_block(root, eb, 1);
        if (ret) {
@@@ -1077,7 -1111,12 +1111,7 @@@ static int __setup_root(u32 nodesize, u
        init_completion(&root->kobj_unregister);
        root->defrag_running = 0;
        root->root_key.objectid = objectid;
 -      root->anon_super.s_root = NULL;
 -      root->anon_super.s_dev = 0;
 -      INIT_LIST_HEAD(&root->anon_super.s_list);
 -      INIT_LIST_HEAD(&root->anon_super.s_instances);
 -      init_rwsem(&root->anon_super.s_umount);
 -
 +      root->anon_dev = 0;
        return 0;
  }
  
@@@ -1306,7 -1345,7 +1340,7 @@@ again
        spin_lock_init(&root->cache_lock);
        init_waitqueue_head(&root->cache_wait);
  
 -      ret = set_anon_super(&root->anon_super, NULL);
 +      ret = get_anon_bdev(&root->anon_dev);
        if (ret)
                goto fail;
  
@@@ -1598,7 -1637,7 +1632,7 @@@ struct btrfs_root *open_ctree(struct su
                goto fail_bdi;
        }
  
-       fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+       mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
                           fs_info->thread_pool_size),
                           &fs_info->generic_worker);
  
+       btrfs_init_workers(&fs_info->caching_workers, "cache",
+                          2, &fs_info->generic_worker);
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
         * devices
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
        btrfs_start_workers(&fs_info->delayed_workers, 1);
+       btrfs_start_workers(&fs_info->caching_workers, 1);
  
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@@ -2112,6 -2155,7 +2150,7 @@@ fail_sb_buffer
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
  fail_alloc:
        kfree(fs_info->delayed_root);
  fail_iput:
@@@ -2388,8 -2432,10 +2427,8 @@@ static void free_fs_root(struct btrfs_r
  {
        iput(root->cache_inode);
        WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
 -      if (root->anon_super.s_dev) {
 -              down_write(&root->anon_super.s_umount);
 -              kill_anon_super(&root->anon_super);
 -      }
 +      if (root->anon_dev)
 +              free_anon_bdev(root->anon_dev);
        free_extent_buffer(root->node);
        free_extent_buffer(root->commit_root);
        kfree(root->free_ino_ctl);
@@@ -2577,6 -2623,7 +2616,7 @@@ int close_ctree(struct btrfs_root *root
        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
+       btrfs_stop_workers(&fs_info->caching_workers);
  
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --combined fs/btrfs/extent_io.c
index 561262d35689f463875e6bf4214f12ff5725b775,5bbdb243bb6f7b95b8abd808806970e5d4ec70eb..067b1747421bfd655b2d0ddbd228f195cca65553
@@@ -281,11 -281,10 +281,10 @@@ static int merge_state(struct extent_io
                if (other->start == state->end + 1 &&
                    other->state == state->state) {
                        merge_cb(tree, state, other);
-                       other->start = state->start;
-                       state->tree = NULL;
-                       rb_erase(&state->rb_node, &tree->state);
-                       free_extent_state(state);
-                       state = NULL;
+                       state->end = other->end;
+                       other->tree = NULL;
+                       rb_erase(&other->rb_node, &tree->state);
+                       free_extent_state(other);
                }
        }
  
@@@ -351,7 -350,6 +350,6 @@@ static int insert_state(struct extent_i
                       "%llu %llu\n", (unsigned long long)found->start,
                       (unsigned long long)found->end,
                       (unsigned long long)start, (unsigned long long)end);
-               free_extent_state(state);
                return -EEXIST;
        }
        state->tree = tree;
@@@ -500,7 -498,8 +498,8 @@@ again
                        cached_state = NULL;
                }
  
-               if (cached && cached->tree && cached->start == start) {
+               if (cached && cached->tree && cached->start <= start &&
+                   cached->end > start) {
                        if (clear)
                                atomic_dec(&cached->refs);
                        state = cached;
@@@ -742,7 -741,8 +741,8 @@@ again
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
                state = *cached_state;
-               if (state->start == start && state->tree) {
+               if (state->start <= start && state->end > start &&
+                   state->tree) {
                        node = &state->rb_node;
                        goto hit_next;
                }
@@@ -783,13 -783,13 +783,13 @@@ hit_next
                if (err)
                        goto out;
  
-               next_node = rb_next(node);
                cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
  
                start = last_end + 1;
+               next_node = rb_next(&state->rb_node);
                if (next_node && start < end && prealloc && !need_resched()) {
                        state = rb_entry(next_node, struct extent_state,
                                         rb_node);
                 * Avoid to free 'prealloc' if it can be merged with
                 * the later extent.
                 */
-               atomic_inc(&prealloc->refs);
                err = insert_state(tree, prealloc, start, this_end,
                                   &bits);
                BUG_ON(err == -EEXIST);
                        goto out;
                }
                cache_state(prealloc, cached_state);
-               free_extent_state(prealloc);
                prealloc = NULL;
                start = this_end + 1;
                goto search_again;
@@@ -1564,7 -1562,8 +1562,8 @@@ int test_range_bit(struct extent_io_tre
        int bitset = 0;
  
        spin_lock(&tree->lock);
-       if (cached && cached->tree && cached->start == start)
+       if (cached && cached->tree && cached->start <= start &&
+           cached->end > start)
                node = &cached->rb_node;
        else
                node = tree_search(tree, start);
@@@ -2432,6 -2431,7 +2431,7 @@@ static int extent_write_cache_pages(str
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
+       int tag;
  
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
                scanned = 1;
        }
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
  retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        while (!done && !nr_to_write_done && (index <= end) &&
-              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY, min(end - index,
-                                 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
                unsigned i;
  
                scanned = 1;
@@@ -2551,6 -2556,7 +2556,6 @@@ int extent_write_full_page(struct exten
        };
        struct writeback_control wbc_writepages = {
                .sync_mode      = wbc->sync_mode,
 -              .older_than_this = NULL,
                .nr_to_write    = 64,
                .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
                .range_end      = (loff_t)-1,
@@@ -2583,6 -2589,7 +2588,6 @@@ int extent_write_locked_range(struct ex
        };
        struct writeback_control wbc_writepages = {
                .sync_mode      = mode,
 -              .older_than_this = NULL,
                .nr_to_write    = nr_pages * 2,
                .range_start    = start,
                .range_end      = end + 1,
@@@ -3020,8 -3027,15 +3025,15 @@@ static struct extent_buffer *__alloc_ex
                return NULL;
        eb->start = start;
        eb->len = len;
-       spin_lock_init(&eb->lock);
-       init_waitqueue_head(&eb->lock_wq);
+       rwlock_init(&eb->lock);
+       atomic_set(&eb->write_locks, 0);
+       atomic_set(&eb->read_locks, 0);
+       atomic_set(&eb->blocking_readers, 0);
+       atomic_set(&eb->blocking_writers, 0);
+       atomic_set(&eb->spinning_readers, 0);
+       atomic_set(&eb->spinning_writers, 0);
+       init_waitqueue_head(&eb->write_lock_wq);
+       init_waitqueue_head(&eb->read_lock_wq);
  
  #if LEAK_DEBUG
        spin_lock_irqsave(&leak_lock, flags);
@@@ -3117,7 -3131,7 +3129,7 @@@ struct extent_buffer *alloc_extent_buff
                i = 0;
        }
        for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
+               p = find_or_create_page(mapping, index, GFP_NOFS);
                if (!p) {
                        WARN_ON(1);
                        goto free_eb;
@@@ -3264,6 -3278,22 +3276,22 @@@ int set_extent_buffer_dirty(struct exte
        return was_dirty;
  }
  
+ static int __eb_straddles_pages(u64 start, u64 len)
+ {
+       if (len < PAGE_CACHE_SIZE)
+               return 1;
+       if (start & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       if ((start + len) & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       return 0;
+ }
+ static int eb_straddles_pages(struct extent_buffer *eb)
+ {
+       return __eb_straddles_pages(eb->start, eb->len);
+ }
  int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
                                struct extent_buffer *eb,
                                struct extent_state **cached_state)
        num_pages = num_extent_pages(eb->start, eb->len);
        clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
  
-       clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                             cached_state, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                     cached_state, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (page)
@@@ -3294,8 -3326,10 +3324,10 @@@ int set_extent_buffer_uptodate(struct e
  
        num_pages = num_extent_pages(eb->start, eb->len);
  
-       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           NULL, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                   NULL, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@@ -3318,9 -3352,12 +3350,12 @@@ int extent_range_uptodate(struct extent
        int uptodate;
        unsigned long index;
  
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
-       if (ret)
-               return 1;
+       if (__eb_straddles_pages(start, end - start + 1)) {
+               ret = test_range_bit(tree, start, end,
+                                    EXTENT_UPTODATE, 1, NULL);
+               if (ret)
+                       return 1;
+       }
        while (start <= end) {
                index = start >> PAGE_CACHE_SHIFT;
                page = find_get_page(tree->mapping, index);
@@@ -3348,10 -3385,12 +3383,12 @@@ int extent_buffer_uptodate(struct exten
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 1;
  
-       ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, cached_state);
-       if (ret)
-               return ret;
+       if (eb_straddles_pages(eb)) {
+               ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, cached_state);
+               if (ret)
+                       return ret;
+       }
  
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
@@@ -3384,9 -3423,11 +3421,11 @@@ int read_extent_buffer_pages(struct ext
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
  
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, NULL)) {
-               return 0;
+       if (eb_straddles_pages(eb)) {
+               if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, NULL)) {
+                       return 0;
+               }
        }
  
        if (start) {
@@@ -3490,9 -3531,8 +3529,8 @@@ void read_extent_buffer(struct extent_b
                page = extent_buffer_page(eb, i);
  
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
  
                dst += cur;
                len -= cur;
  }
  
  int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **token, char **map,
+                              unsigned long min_len, char **map,
                               unsigned long *map_start,
-                              unsigned long *map_len, int km)
+                              unsigned long *map_len)
  {
        size_t offset = start & (PAGE_CACHE_SIZE - 1);
        char *kaddr;
        }
  
        p = extent_buffer_page(eb, i);
-       kaddr = kmap_atomic(p, km);
-       *token = kaddr;
+       kaddr = page_address(p);
        *map = kaddr + offset;
        *map_len = PAGE_CACHE_SIZE - offset;
        return 0;
  }
  
- int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
- {
-       int err;
-       int save = 0;
-       if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, km);
-               eb->map_token = NULL;
-               save = 1;
-       }
-       err = map_private_extent_buffer(eb, start, min_len, token, map,
-                                      map_start, map_len, km);
-       if (!err && save) {
-               eb->map_token = *token;
-               eb->kaddr = *map;
-               eb->map_start = *map_start;
-               eb->map_len = *map_len;
-       }
-       return err;
- }
- void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
- {
-       kunmap_atomic(token, km);
- }
  int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
                          unsigned long start,
                          unsigned long len)
  
                cur = min(len, (PAGE_CACHE_SIZE - offset));
  
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                ret = memcmp(ptr, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
                if (ret)
                        break;
  
@@@ -3628,9 -3637,8 +3635,8 @@@ void write_extent_buffer(struct extent_
                WARN_ON(!PageUptodate(page));
  
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER1);
  
                src += cur;
                len -= cur;
@@@ -3659,9 -3667,8 +3665,8 @@@ void memset_extent_buffer(struct extent
                WARN_ON(!PageUptodate(page));
  
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                memset(kaddr + offset, c, cur);
-               kunmap_atomic(kaddr, KM_USER0);
  
                len -= cur;
                offset = 0;
@@@ -3692,9 -3699,8 +3697,8 @@@ void copy_extent_buffer(struct extent_b
  
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
  
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
  
                src_offset += cur;
                len -= cur;
@@@ -3707,20 -3713,17 +3711,17 @@@ static void move_pages(struct page *dst
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
  {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        if (dst_page == src_page) {
                memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
        } else {
-               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *src_kaddr = page_address(src_page);
                char *p = dst_kaddr + dst_off + len;
                char *s = src_kaddr + src_off + len;
  
                while (len--)
                        *--p = *--s;
-               kunmap_atomic(src_kaddr, KM_USER1);
        }
-       kunmap_atomic(dst_kaddr, KM_USER0);
  }
  
  static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@@ -3733,20 -3736,17 +3734,17 @@@ static void copy_pages(struct page *dst
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
  {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        char *src_kaddr;
  
        if (dst_page != src_page) {
-               src_kaddr = kmap_atomic(src_page, KM_USER1);
+               src_kaddr = page_address(src_page);
        } else {
                src_kaddr = dst_kaddr;
                BUG_ON(areas_overlap(src_off, dst_off, len));
        }
  
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       kunmap_atomic(dst_kaddr, KM_USER0);
-       if (dst_page != src_page)
-               kunmap_atomic(src_kaddr, KM_USER1);
  }
  
  void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
diff --combined fs/btrfs/file.c
index 59cbdb120ad0917a001dd9c97d5a17999e6ce59e,6e56a468d1f51f0f595b5227cd8d9d11958f0d32..a35e51c9f235559c5c9a0f5f5ddea267e6c14a40
@@@ -1081,7 -1081,8 +1081,8 @@@ static noinline int prepare_pages(struc
  
  again:
        for (i = 0; i < num_pages; i++) {
-               pages[i] = grab_cache_page(inode->i_mapping, index + i);
+               pages[i] = find_or_create_page(inode->i_mapping, index + i,
+                                              GFP_NOFS);
                if (!pages[i]) {
                        faili = i - 1;
                        err = -ENOMEM;
@@@ -1238,9 -1239,11 +1239,11 @@@ static noinline ssize_t __btrfs_buffere
                 * managed to copy.
                 */
                if (num_pages > dirty_pages) {
-                       if (copied > 0)
-                               atomic_inc(
-                                       &BTRFS_I(inode)->outstanding_extents);
+                       if (copied > 0) {
+                               spin_lock(&BTRFS_I(inode)->lock);
+                               BTRFS_I(inode)->outstanding_extents++;
+                               spin_unlock(&BTRFS_I(inode)->lock);
+                       }
                        btrfs_delalloc_release_space(inode,
                                        (num_pages - dirty_pages) <<
                                        PAGE_CACHE_SHIFT);
@@@ -1452,7 -1455,7 +1455,7 @@@ int btrfs_release_file(struct inode *in
   * important optimization for directories because holding the mutex prevents
   * new operations on the dir while we write to disk.
   */
 -int btrfs_sync_file(struct file *file, int datasync)
 +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
  {
        struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = dentry->d_inode;
  
        trace_btrfs_sync_file(file, datasync);
  
 +      ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 +      if (ret)
 +              return ret;
 +      mutex_lock(&inode->i_mutex);
 +
        /* we wait first, since the writeback may change the inode */
        root->log_batch++;
 -      /* the VFS called filemap_fdatawrite for us */
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
        root->log_batch++;
  
         * check the transaction that last modified this inode
         * and see if its already been committed
         */
 -      if (!BTRFS_I(inode)->last_trans)
 +      if (!BTRFS_I(inode)->last_trans) {
 +              mutex_unlock(&inode->i_mutex);
                goto out;
 +      }
  
        /*
         * if the last transaction that changed this file was before
        if (BTRFS_I(inode)->last_trans <=
            root->fs_info->last_trans_committed) {
                BTRFS_I(inode)->last_trans = 0;
 +              mutex_unlock(&inode->i_mutex);
                goto out;
        }
  
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
 +              mutex_unlock(&inode->i_mutex);
                goto out;
        }
  
        ret = btrfs_log_dentry_safe(trans, root, dentry);
 -      if (ret < 0)
 +      if (ret < 0) {
 +              mutex_unlock(&inode->i_mutex);
                goto out;
 +      }
  
        /* we've logged all the items and now have a consistent
         * version of the file in the log.  It is possible that
         * file again, but that will end up using the synchronization
         * inside btrfs_sync_log to keep things safe.
         */
 -      mutex_unlock(&dentry->d_inode->i_mutex);
 +      mutex_unlock(&inode->i_mutex);
  
        if (ret != BTRFS_NO_LOG_SYNC) {
                if (ret > 0) {
        } else {
                ret = btrfs_end_transaction(trans, root);
        }
 -      mutex_lock(&dentry->d_inode->i_mutex);
  out:
        return ret > 0 ? -EIO : ret;
  }
        return ret;
  }
  
 +static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 +{
 +      struct btrfs_root *root = BTRFS_I(inode)->root;
 +      struct extent_map *em;
 +      struct extent_state *cached_state = NULL;
 +      u64 lockstart = *offset;
 +      u64 lockend = i_size_read(inode);
 +      u64 start = *offset;
 +      u64 orig_start = *offset;
 +      u64 len = i_size_read(inode);
 +      u64 last_end = 0;
 +      int ret = 0;
 +
 +      lockend = max_t(u64, root->sectorsize, lockend);
 +      if (lockend <= lockstart)
 +              lockend = lockstart + root->sectorsize;
 +
 +      len = lockend - lockstart + 1;
 +
 +      len = max_t(u64, len, root->sectorsize);
 +      if (inode->i_size == 0)
 +              return -ENXIO;
 +
 +      lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
 +                       &cached_state, GFP_NOFS);
 +
 +      /*
 +       * Delalloc is such a pain.  If we have a hole and we have pending
 +       * delalloc for a portion of the hole we will get back a hole that
 +       * exists for the entire range since it hasn't been actually written
 +       * yet.  So to take care of this case we need to look for an extent just
 +       * before the position we want in case there is outstanding delalloc
 +       * going on here.
 +       */
 +      if (origin == SEEK_HOLE && start != 0) {
 +              if (start <= root->sectorsize)
 +                      em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
 +                                                   root->sectorsize, 0);
 +              else
 +                      em = btrfs_get_extent_fiemap(inode, NULL, 0,
 +                                                   start - root->sectorsize,
 +                                                   root->sectorsize, 0);
 +              if (IS_ERR(em)) {
 +                      ret = -ENXIO;
 +                      goto out;
 +              }
 +              last_end = em->start + em->len;
 +              if (em->block_start == EXTENT_MAP_DELALLOC)
 +                      last_end = min_t(u64, last_end, inode->i_size);
 +              free_extent_map(em);
 +      }
 +
 +      while (1) {
 +              em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
 +              if (IS_ERR(em)) {
 +                      ret = -ENXIO;
 +                      break;
 +              }
 +
 +              if (em->block_start == EXTENT_MAP_HOLE) {
 +                      if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
 +                              if (last_end <= orig_start) {
 +                                      free_extent_map(em);
 +                                      ret = -ENXIO;
 +                                      break;
 +                              }
 +                      }
 +
 +                      if (origin == SEEK_HOLE) {
 +                              *offset = start;
 +                              free_extent_map(em);
 +                              break;
 +                      }
 +              } else {
 +                      if (origin == SEEK_DATA) {
 +                              if (em->block_start == EXTENT_MAP_DELALLOC) {
 +                                      if (start >= inode->i_size) {
 +                                              free_extent_map(em);
 +                                              ret = -ENXIO;
 +                                              break;
 +                                      }
 +                              }
 +
 +                              *offset = start;
 +                              free_extent_map(em);
 +                              break;
 +                      }
 +              }
 +
 +              start = em->start + em->len;
 +              last_end = em->start + em->len;
 +
 +              if (em->block_start == EXTENT_MAP_DELALLOC)
 +                      last_end = min_t(u64, last_end, inode->i_size);
 +
 +              if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
 +                      free_extent_map(em);
 +                      ret = -ENXIO;
 +                      break;
 +              }
 +              free_extent_map(em);
 +              cond_resched();
 +      }
 +      if (!ret)
 +              *offset = min(*offset, inode->i_size);
 +out:
 +      unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 +                           &cached_state, GFP_NOFS);
 +      return ret;
 +}
 +
 +static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
 +{
 +      struct inode *inode = file->f_mapping->host;
 +      int ret;
 +
 +      mutex_lock(&inode->i_mutex);
 +      switch (origin) {
 +      case SEEK_END:
 +      case SEEK_CUR:
 +              offset = generic_file_llseek_unlocked(file, offset, origin);
 +              goto out;
 +      case SEEK_DATA:
 +      case SEEK_HOLE:
 +              ret = find_desired_extent(inode, &offset, origin);
 +              if (ret) {
 +                      mutex_unlock(&inode->i_mutex);
 +                      return ret;
 +              }
 +      }
 +
 +      if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
 +              return -EINVAL;
 +      if (offset > inode->i_sb->s_maxbytes)
 +              return -EINVAL;
 +
 +      /* Special lock needed here? */
 +      if (offset != file->f_pos) {
 +              file->f_pos = offset;
 +              file->f_version = 0;
 +      }
 +out:
 +      mutex_unlock(&inode->i_mutex);
 +      return offset;
 +}
 +
  const struct file_operations btrfs_file_operations = {
 -      .llseek         = generic_file_llseek,
 +      .llseek         = btrfs_file_llseek,
        .read           = do_sync_read,
        .write          = do_sync_write,
        .aio_read       = generic_file_aio_read,
diff --combined fs/btrfs/inode.c
index caa26ab5ed6833094a3334ed388edca18d2fd15a,586cf6a438559e996e4bc7856b4bfe5a15647330..13e6255182e3dad59af5463537dbc21abdff4bef
@@@ -750,15 -750,6 +750,6 @@@ static u64 get_extent_allocation_hint(s
        return alloc_hint;
  }
  
- static inline bool is_free_space_inode(struct btrfs_root *root,
-                                      struct inode *inode)
- {
-       if (root == root->fs_info->tree_root ||
-           BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
-               return true;
-       return false;
- }
  /*
   * when extent_io.c finds a delayed allocation range in the file,
   * the call backs end up in this code.  The basic idea is to
@@@ -791,7 -782,7 +782,7 @@@ static noinline int cow_file_range(stru
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
  
-       BUG_ON(is_free_space_inode(root, inode));
+       BUG_ON(btrfs_is_free_space_inode(root, inode));
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@@ -1072,7 -1063,7 +1063,7 @@@ static noinline int run_delalloc_nocow(
        path = btrfs_alloc_path();
        BUG_ON(!path);
  
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
  
        if (nolock)
                trans = btrfs_join_transaction_nolock(root);
@@@ -1298,7 -1289,9 +1289,9 @@@ static int btrfs_split_extent_hook(stru
        if (!(orig->state & EXTENT_DELALLOC))
                return 0;
  
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
  }
  
@@@ -1316,7 -1309,9 +1309,9 @@@ static int btrfs_merge_extent_hook(stru
        if (!(other->state & EXTENT_DELALLOC))
                return 0;
  
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
  }
  
@@@ -1337,12 -1332,15 +1332,15 @@@ static int btrfs_set_bit_hook(struct in
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
  
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else
-                       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               } else {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
  
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
@@@ -1370,12 -1368,15 +1368,15 @@@ static int btrfs_clear_bit_hook(struct 
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               bool do_list = !is_free_space_inode(root, inode);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
  
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else if (!(*bits & EXTENT_DO_ACCOUNTING))
-                       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+               } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
  
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
@@@ -1477,7 -1478,7 +1478,7 @@@ static int btrfs_submit_bio_hook(struc
  
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
  
-       if (is_free_space_inode(root, inode))
+       if (btrfs_is_free_space_inode(root, inode))
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
        else
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@@ -1726,7 -1727,7 +1727,7 @@@ static int btrfs_finish_ordered_io(stru
                return 0;
        BUG_ON(!ordered_extent);
  
-       nolock = is_free_space_inode(root, inode);
+       nolock = btrfs_is_free_space_inode(root, inode);
  
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
@@@ -2531,13 -2532,6 +2532,6 @@@ static void btrfs_read_locked_inode(str
  
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)inode_item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
        inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@@ -2575,11 -2569,6 +2569,6 @@@ cache_acl
        if (!maybe_acls)
                cache_no_acl(inode);
  
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
        btrfs_free_path(path);
  
        switch (inode->i_mode & S_IFMT) {
@@@ -2624,13 -2613,6 +2613,6 @@@ static void fill_inode_item(struct btrf
                            struct btrfs_inode_item *item,
                            struct inode *inode)
  {
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
        btrfs_set_inode_block_group(leaf, item, 0);
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
  }
  
  /*
@@@ -2684,7 -2661,7 +2661,7 @@@ noinline int btrfs_update_inode(struct 
         * The data relocation inode should also be directly updated
         * without delay
         */
-       if (!is_free_space_inode(root, inode)
+       if (!btrfs_is_free_space_inode(root, inode)
            && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
                ret = btrfs_delayed_update_inode(trans, root, inode);
                if (!ret)
@@@ -3398,7 -3375,7 +3375,7 @@@ static int btrfs_truncate_page(struct a
  
        ret = -ENOMEM;
  again:
-       page = grab_cache_page(mapping, index);
+       page = find_or_create_page(mapping, index, GFP_NOFS);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
@@@ -3634,7 -3611,7 +3611,7 @@@ void btrfs_evict_inode(struct inode *in
  
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-                              is_free_space_inode(root, inode)))
+                              btrfs_is_free_space_inode(root, inode)))
                goto no_delete;
  
        if (is_bad_inode(inode)) {
@@@ -4079,7 -4056,13 +4056,7 @@@ static int btrfs_dentry_delete(const st
  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                   struct nameidata *nd)
  {
 -      struct inode *inode;
 -
 -      inode = btrfs_lookup_dentry(dir, dentry);
 -      if (IS_ERR(inode))
 -              return ERR_CAST(inode);
 -
 -      return d_splice_alias(inode, dentry);
 +      return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
  }
  
  unsigned char btrfs_filetype_table[] = {
@@@ -4271,7 -4254,7 +4248,7 @@@ int btrfs_write_inode(struct inode *ino
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
  
-       if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+       if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
                nolock = true;
  
        if (wbc->sync_mode == WB_SYNC_ALL) {
@@@ -4467,7 -4450,7 +4444,7 @@@ static struct inode *btrfs_new_inode(st
        inode->i_generation = BTRFS_I(inode)->generation;
        btrfs_set_inode_space_info(root, inode);
  
 -      if (mode & S_IFDIR)
 +      if (S_ISDIR(mode))
                owner = 0;
        else
                owner = 1;
  
        btrfs_inherit_iflags(inode, dir);
  
 -      if ((mode & S_IFREG)) {
 +      if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
                if (btrfs_test_opt(root, NODATACOW) ||
@@@ -4766,10 -4749,11 +4743,10 @@@ static int btrfs_link(struct dentry *ol
        if (err) {
                drop_inode = 1;
        } else {
 -              struct dentry *parent = dget_parent(dentry);
 +              struct dentry *parent = dentry->d_parent;
                err = btrfs_update_inode(trans, root, inode);
                BUG_ON(err);
                btrfs_log_new_name(trans, inode, NULL, parent);
 -              dput(parent);
        }
  
        nr = trans->blocks_used;
@@@ -6728,8 -6712,9 +6705,9 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
  
-       atomic_set(&ei->outstanding_extents, 0);
-       atomic_set(&ei->reserved_extents, 0);
+       spin_lock_init(&ei->lock);
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
  
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
@@@ -6767,8 -6752,8 +6745,8 @@@ void btrfs_destroy_inode(struct inode *
  
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+       WARN_ON(BTRFS_I(inode)->outstanding_extents);
+       WARN_ON(BTRFS_I(inode)->reserved_extents);
  
        /*
         * This can happen where we create an inode, but somebody else also
@@@ -6823,7 -6808,7 +6801,7 @@@ int btrfs_drop_inode(struct inode *inod
        struct btrfs_root *root = BTRFS_I(inode)->root;
  
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           !is_free_space_inode(root, inode))
+           !btrfs_is_free_space_inode(root, inode))
                return 1;
        else
                return generic_drop_inode(inode);
@@@ -6893,7 -6878,7 +6871,7 @@@ static int btrfs_getattr(struct vfsmoun
  {
        struct inode *inode = dentry->d_inode;
        generic_fillattr(inode, stat);
 -      stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
 +      stat->dev = BTRFS_I(inode)->root->anon_dev;
        stat->blksize = PAGE_CACHE_SIZE;
        stat->blocks = (inode_get_bytes(inode) +
                        BTRFS_I(inode)->delalloc_bytes) >> 9;
@@@ -7061,8 -7046,9 +7039,8 @@@ static int btrfs_rename(struct inode *o
        BUG_ON(ret);
  
        if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
 -              struct dentry *parent = dget_parent(new_dentry);
 +              struct dentry *parent = new_dentry->d_parent;
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
 -              dput(parent);
                btrfs_end_log_trans(root);
        }
  out_fail:
@@@ -7323,7 -7309,7 +7301,7 @@@ static int btrfs_set_page_dirty(struct 
        return __set_page_dirty_nobuffers(page);
  }
  
 -static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
 +static int btrfs_permission(struct inode *inode, int mask)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
  
                return -EROFS;
        if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
                return -EACCES;
 -      return generic_permission(inode, mask, flags, btrfs_check_acl);
 +      return generic_permission(inode, mask);
  }
  
  static const struct inode_operations btrfs_dir_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
 +      .get_acl        = btrfs_get_acl,
  };
  static const struct inode_operations btrfs_dir_ro_inode_operations = {
        .lookup         = btrfs_lookup,
        .permission     = btrfs_permission,
 +      .get_acl        = btrfs_get_acl,
  };
  
  static const struct file_operations btrfs_dir_file_operations = {
@@@ -7425,7 -7409,6 +7403,7 @@@ static const struct inode_operations bt
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
        .fiemap         = btrfs_fiemap,
 +      .get_acl        = btrfs_get_acl,
  };
  static const struct inode_operations btrfs_special_inode_operations = {
        .getattr        = btrfs_getattr,
        .getxattr       = btrfs_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
 +      .get_acl        = btrfs_get_acl,
  };
  static const struct inode_operations btrfs_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .getxattr       = btrfs_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
 +      .get_acl        = btrfs_get_acl,
  };
  
  const struct dentry_operations btrfs_dentry_operations = {
diff --combined fs/btrfs/ioctl.c
index 622543309eb25e86fb39308ce077b84b4a883c70,fd252fff4c6666d19ebc53553c96d17d8567d37a..0b980afc5eddfd4aeec80162e2eebecadd4015de
@@@ -323,7 -323,7 +323,7 @@@ static noinline int create_subvol(struc
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *leaf;
        struct btrfs_root *new_root;
 -      struct dentry *parent = dget_parent(dentry);
 +      struct dentry *parent = dentry->d_parent;
        struct inode *dir;
        int ret;
        int err;
        u64 index = 0;
  
        ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
 -      if (ret) {
 -              dput(parent);
 +      if (ret)
                return ret;
 -      }
  
        dir = parent->d_inode;
  
         * 2 - dir items
         */
        trans = btrfs_start_transaction(root, 6);
 -      if (IS_ERR(trans)) {
 -              dput(parent);
 +      if (IS_ERR(trans))
                return PTR_ERR(trans);
 -      }
  
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
                                      0, objectid, NULL, 0, 0, 0);
  
        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
  fail:
 -      dput(parent);
        if (async_transid) {
                *async_transid = trans->transid;
                err = btrfs_commit_transaction_async(trans, root, 1);
@@@ -451,6 -456,7 +451,6 @@@ static int create_snapshot(struct btrfs
                           bool readonly)
  {
        struct inode *inode;
 -      struct dentry *parent;
        struct btrfs_pending_snapshot *pending_snapshot;
        struct btrfs_trans_handle *trans;
        int ret;
        if (ret)
                goto fail;
  
 -      parent = dget_parent(dentry);
 -      inode = btrfs_lookup_dentry(parent->d_inode, dentry);
 -      dput(parent);
 +      inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                goto fail;
@@@ -859,8 -867,8 +859,8 @@@ again
        /* step one, lock all the pages */
        for (i = 0; i < num_pages; i++) {
                struct page *page;
-               page = grab_cache_page(inode->i_mapping,
-                                           start_index + i);
+               page = find_or_create_page(inode->i_mapping,
+                                           start_index + i, GFP_NOFS);
                if (!page)
                        break;
  
                          GFP_NOFS);
  
        if (i_done != num_pages) {
-               atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               spin_lock(&BTRFS_I(inode)->lock);
+               BTRFS_I(inode)->outstanding_extents++;
+               spin_unlock(&BTRFS_I(inode)->lock);
                btrfs_delalloc_release_space(inode,
                                     (num_pages - i_done) << PAGE_CACHE_SHIFT);
        }