]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - mm/shmem.c
mm: vmscan: fix force-scanning small targets without swap
[linux-2.6.git] / mm / shmem.c
index 13ef2d7e912df21ea69316a0ef34371ca106cb27..32f6763f16fb82ad7068c4d73479a96583f4ef15 100644 (file)
@@ -6,7 +6,8 @@
  *              2000-2001 Christoph Rohland
  *              2000-2001 SAP AG
  *              2002 Red Hat Inc.
- * Copyright (C) 2002-2005 Hugh Dickins.
+ * Copyright (C) 2002-2011 Hugh Dickins.
+ * Copyright (C) 2011 Google Inc.
  * Copyright (C) 2002-2005 VERITAS Software Corporation.
  * Copyright (C) 2004 Andi Kleen, SuSE Labs
  *
@@ -72,6 +73,9 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
+#define SHORT_SYMLINK_LEN 128
+
 struct shmem_xattr {
        struct list_head list;  /* anchored by shmem_inode_info->xattr_list */
        char *name;             /* xattr name */
@@ -219,19 +223,6 @@ static void shmem_recalc_inode(struct inode *inode)
        }
 }
 
-static void shmem_put_swap(struct shmem_inode_info *info, pgoff_t index,
-                          swp_entry_t swap)
-{
-       if (index < SHMEM_NR_DIRECT)
-               info->i_direct[index] = swap;
-}
-
-static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
-{
-       return (index < SHMEM_NR_DIRECT) ?
-               info->i_direct[index] : (swp_entry_t){0};
-}
-
 /*
  * Replace item expected in radix tree by a new item, while holding tree lock.
  */
@@ -299,6 +290,25 @@ static int shmem_add_to_page_cache(struct page *page,
        return error;
 }
 
+/*
+ * Like delete_from_page_cache, but substitutes swap for page.
+ */
+static void shmem_delete_from_page_cache(struct page *page, void *radswap)
+{
+       struct address_space *mapping = page->mapping;
+       int error;
+
+       spin_lock_irq(&mapping->tree_lock);
+       error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+       page->mapping = NULL;
+       mapping->nrpages--;
+       __dec_zone_page_state(page, NR_FILE_PAGES);
+       __dec_zone_page_state(page, NR_SHMEM);
+       spin_unlock_irq(&mapping->tree_lock);
+       page_cache_release(page);
+       BUG_ON(error);
+}
+
 /*
  * Like find_get_pages, but collecting swap entries as well as pages.
  */
@@ -322,10 +332,14 @@ repeat:
                if (unlikely(!page))
                        continue;
                if (radix_tree_exception(page)) {
-                       if (radix_tree_exceptional_entry(page))
-                               goto export;
-                       /* radix_tree_deref_retry(page) */
-                       goto restart;
+                       if (radix_tree_deref_retry(page))
+                               goto restart;
+                       /*
+                        * Otherwise, we must be storing a swap entry
+                        * here as an exceptional entry: so return it
+                        * without attempting to raise page count.
+                        */
+                       goto export;
                }
                if (!page_cache_get_speculative(page))
                        goto repeat;
@@ -346,42 +360,6 @@ export:
        return ret;
 }
 
-/*
- * Lockless lookup of swap entry in radix tree, avoiding refcount on pages.
- */
-static pgoff_t shmem_find_swap(struct address_space *mapping, void *radswap)
-{
-       void  **slots[PAGEVEC_SIZE];
-       pgoff_t indices[PAGEVEC_SIZE];
-       unsigned int nr_found;
-
-restart:
-       nr_found = 1;
-       indices[0] = -1;
-       while (nr_found) {
-               pgoff_t index = indices[nr_found - 1] + 1;
-               unsigned int i;
-
-               rcu_read_lock();
-               nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-                                       slots, indices, index, PAGEVEC_SIZE);
-               for (i = 0; i < nr_found; i++) {
-                       void *item = radix_tree_deref_slot(slots[i]);
-                       if (radix_tree_deref_retry(item)) {
-                               rcu_read_unlock();
-                               goto restart;
-                       }
-                       if (item == radswap) {
-                               rcu_read_unlock();
-                               return indices[i];
-                       }
-               }
-               rcu_read_unlock();
-               cond_resched();
-       }
-       return -1;
-}
-
 /*
  * Remove swap entry from radix tree, free the swap and its page cache.
  */
@@ -578,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode)
                        list_del_init(&info->swaplist);
                        mutex_unlock(&shmem_swaplist_mutex);
                }
-       }
+       } else
+               kfree(info->symlink);
 
        list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
                kfree(xattr->name);
@@ -601,7 +580,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
        int error;
 
        radswap = swp_to_radix_entry(swap);
-       index = shmem_find_swap(mapping, radswap);
+       index = radix_tree_locate_item(&mapping->page_tree, radswap);
        if (index == -1)
                return 0;
 
@@ -664,14 +643,10 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
        mutex_lock(&shmem_swaplist_mutex);
        list_for_each_safe(this, next, &shmem_swaplist) {
                info = list_entry(this, struct shmem_inode_info, swaplist);
-               if (!info->swapped) {
-                       spin_lock(&info->lock);
-                       if (!info->swapped)
-                               list_del_init(&info->swaplist);
-                       spin_unlock(&info->lock);
-               }
                if (info->swapped)
                        found = shmem_unuse_inode(info, swap, page);
+               else
+                       list_del_init(&info->swaplist);
                cond_resched();
                if (found)
                        break;
@@ -694,10 +669,10 @@ out:
 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 {
        struct shmem_inode_info *info;
-       swp_entry_t swap, oswap;
        struct address_space *mapping;
-       pgoff_t index;
        struct inode *inode;
+       swp_entry_t swap;
+       pgoff_t index;
 
        BUG_ON(!PageLocked(page));
        mapping = page->mapping;
@@ -720,55 +695,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                WARN_ON_ONCE(1);        /* Still happens? Tell us about it! */
                goto redirty;
        }
-
-       /*
-        * Disable even the toy swapping implementation, while we convert
-        * functions one by one to having swap entries in the radix tree.
-        */
-       if (index < ULONG_MAX)
-               goto redirty;
-
        swap = get_swap_page();
        if (!swap.val)
                goto redirty;
 
        /*
         * Add inode to shmem_unuse()'s list of swapped-out inodes,
-        * if it's not already there.  Do it now because we cannot take
-        * mutex while holding spinlock, and must do so before the page
-        * is moved to swap cache, when its pagelock no longer protects
+        * if it's not already there.  Do it now before the page is
+        * moved to swap cache, when its pagelock no longer protects
         * the inode from eviction.  But don't unlock the mutex until
-        * we've taken the spinlock, because shmem_unuse_inode() will
-        * prune a !swapped inode from the swaplist under both locks.
+        * we've incremented swapped, because shmem_unuse_inode() will
+        * prune a !swapped inode from the swaplist under this mutex.
         */
        mutex_lock(&shmem_swaplist_mutex);
        if (list_empty(&info->swaplist))
                list_add_tail(&info->swaplist, &shmem_swaplist);
 
-       spin_lock(&info->lock);
-       mutex_unlock(&shmem_swaplist_mutex);
-
-       oswap = shmem_get_swap(info, index);
-       if (oswap.val) {
-               WARN_ON_ONCE(1);        /* Still happens? Tell us about it! */
-               free_swap_and_cache(oswap);
-               shmem_put_swap(info, index, (swp_entry_t){0});
-               info->swapped--;
-       }
-       shmem_recalc_inode(inode);
-
        if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
-               delete_from_page_cache(page);
-               shmem_put_swap(info, index, swap);
-               info->swapped++;
                swap_shmem_alloc(swap);
+               shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
+               spin_lock(&info->lock);
+               info->swapped++;
+               shmem_recalc_inode(inode);
                spin_unlock(&info->lock);
+
+               mutex_unlock(&shmem_swaplist_mutex);
                BUG_ON(page_mapped(page));
                swap_writepage(page, wbc);
                return 0;
        }
 
-       spin_unlock(&info->lock);
+       mutex_unlock(&shmem_swaplist_mutex);
        swapcache_free(swap, NULL);
 redirty:
        set_page_dirty(page);
@@ -1187,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
-static const struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_short_symlink_operations;
 
 static int
 shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1652,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
        info = SHMEM_I(inode);
        inode->i_size = len-1;
-       if (len <= SHMEM_SYMLINK_INLINE_LEN) {
-               /* do it inline */
-               memcpy(info->inline_symlink, symname, len);
-               inode->i_op = &shmem_symlink_inline_operations;
+       if (len <= SHORT_SYMLINK_LEN) {
+               info->symlink = kmemdup(symname, len, GFP_KERNEL);
+               if (!info->symlink) {
+                       iput(inode);
+                       return -ENOMEM;
+               }
+               inode->i_op = &shmem_short_symlink_operations;
        } else {
                error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
                if (error) {
@@ -1678,9 +1639,9 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
        return 0;
 }
 
-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
 {
-       nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
+       nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
        return NULL;
 }
 
@@ -1928,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 #endif /* CONFIG_TMPFS_XATTR */
 
-static const struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_short_symlink_operations = {
        .readlink       = generic_readlink,
-       .follow_link    = shmem_follow_link_inline,
+       .follow_link    = shmem_follow_short_symlink,
 #ifdef CONFIG_TMPFS_XATTR
        .setxattr       = shmem_setxattr,
        .getxattr       = shmem_getxattr,
@@ -2273,10 +2234,8 @@ static void shmem_destroy_callback(struct rcu_head *head)
 
 static void shmem_destroy_inode(struct inode *inode)
 {
-       if ((inode->i_mode & S_IFMT) == S_IFREG) {
-               /* only struct inode is valid if it's an inline symlink */
+       if ((inode->i_mode & S_IFMT) == S_IFREG)
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
-       }
        call_rcu(&inode->i_rcu, shmem_destroy_callback);
 }