mm: fix mbind vma merge problem

[linux-2.6.git] / mm / swapfile.c
diff --git a/mm/swapfile.c b/mm/swapfile.c

index cc5e7ebf2d2c5105328688045080bcc6ce5a983f..187a21f8b7bdf2d4d5b99b8c51927c56e7ee80d7 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -22,6 +22,7 @@
  #include <linux/seq_file.h>
  #include <linux/init.h>
  #include <linux/module.h>
+#include <linux/ksm.h>
  #include <linux/rmap.h>
  #include <linux/security.h>
  #include <linux/backing-dev.h>
@@ -38,6 +39,7 @@
  static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
                                  unsigned char);
  static void free_swap_count_continuations(struct swap_info_struct *);
+static sector_t map_swap_entry(swp_entry_t, struct block_device**);
  
  static DEFINE_SPINLOCK(swap_lock);
  static unsigned int nr_swapfiles;
@@ -548,6 +550,12 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
         if (usage == SWAP_HAS_CACHE) {
                 VM_BUG_ON(!has_cache);
                 has_cache = 0;
+       } else if (count == SWAP_MAP_SHMEM) {
+               /*
+                * Or we could insist on shmem.c using a special
+                * swap_shmem_free() and free_shmem_swap_and_cache()...
+                */
+               count = 0;
         } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
                 if (count == COUNT_CONTINUED) {
                         if (swap_count_continued(p, offset, count))
@@ -643,6 +651,8 @@ int reuse_swap_page(struct page *page)
         int count;
  
         VM_BUG_ON(!PageLocked(page));
+       if (unlikely(PageKsm(page)))
+               return 0;
         count = page_mapcount(page);
         if (count <= 1 && PageSwapCache(page)) {
                 count += page_swapcount(page);
@@ -651,7 +661,7 @@ int reuse_swap_page(struct page *page)
                         SetPageDirty(page);
                 }
         }
-       return count == 1;
+       return count <= 1;
  }
  
  /*
@@ -776,7 +786,7 @@ sector_t swapdev_block(int type, pgoff_t offset)
                 return 0;
         if (!(swap_info[type]->flags & SWP_WRITEOK))
                 return 0;
-       return map_swap_page(swp_entry(type, offset), &bdev);
+       return map_swap_entry(swp_entry(type, offset), &bdev);
  }
  
  /*
@@ -830,7 +840,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                 goto out;
         }
  
-       inc_mm_counter(vma->vm_mm, anon_rss);
+       dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
+       inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
         get_page(page);
         set_pte_at(vma->vm_mm, addr, pte,
                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
@@ -931,7 +942,7 @@ static int unuse_vma(struct vm_area_struct *vma,
         unsigned long addr, end, next;
         int ret;
  
-       if (page->mapping) {
+       if (page_anon_vma(page)) {
                 addr = page_address_in_vma(page, vma);
                 if (addr == -EFAULT)
                         return 0;
@@ -1031,7 +1042,6 @@ static int try_to_unuse(unsigned int type)
         swp_entry_t entry;
         unsigned int i = 0;
         int retval = 0;
-       int shmem;
  
         /*
          * When searching mms for an entry, a good strategy is to
@@ -1107,17 +1117,18 @@ static int try_to_unuse(unsigned int type)
  
                 /*
                  * Remove all references to entry.
-                * Whenever we reach init_mm, there's no address space
-                * to search, but use it as a reminder to search shmem.
                  */
-               shmem = 0;
                 swcount = *swap_map;
-               if (swap_count(swcount)) {
-                       if (start_mm == &init_mm)
-                               shmem = shmem_unuse(entry, page);
-                       else
-                               retval = unuse_mm(start_mm, entry, page);
+               if (swap_count(swcount) == SWAP_MAP_SHMEM) {
+                       retval = shmem_unuse(entry, page);
+                       /* page has already been unlocked and released */
+                       if (retval < 0)
+                               break;
+                       continue;
                 }
+               if (swap_count(swcount) && start_mm != &init_mm)
+                       retval = unuse_mm(start_mm, entry, page);
+
                 if (swap_count(*swap_map)) {
                         int set_start_mm = (*swap_map >= swcount);
                         struct list_head *p = &start_mm->mmlist;
@@ -1128,7 +1139,7 @@ static int try_to_unuse(unsigned int type)
                         atomic_inc(&new_start_mm->mm_users);
                         atomic_inc(&prev_mm->mm_users);
                         spin_lock(&mmlist_lock);
-                       while (swap_count(*swap_map) && !retval && !shmem &&
+                       while (swap_count(*swap_map) && !retval &&
                                         (p = p->next) != &start_mm->mmlist) {
                                 mm = list_entry(p, struct mm_struct, mmlist);
                                 if (!atomic_inc_not_zero(&mm->mm_users))
@@ -1142,10 +1153,9 @@ static int try_to_unuse(unsigned int type)
                                 swcount = *swap_map;
                                 if (!swap_count(swcount)) /* any usage ? */
                                         ;
-                               else if (mm == &init_mm) {
+                               else if (mm == &init_mm)
                                         set_start_mm = 1;
-                                       shmem = shmem_unuse(entry, page);
-                               } else
+                               else
                                         retval = unuse_mm(mm, entry, page);
  
                                 if (set_start_mm && *swap_map < swcount) {
@@ -1161,13 +1171,6 @@ static int try_to_unuse(unsigned int type)
                         mmput(start_mm);
                         start_mm = new_start_mm;
                 }
-               if (shmem) {
-                       /* page has already been unlocked and released */
-                       if (shmem > 0)
-                               continue;
-                       retval = shmem;
-                       break;
-               }
                 if (retval) {
                         unlock_page(page);
                         page_cache_release(page);
@@ -1186,6 +1189,12 @@ static int try_to_unuse(unsigned int type)
                  * read from disk into another page.  Splitting into two
                  * pages would be incorrect if swap supported "shared
                  * private" pages, but they are handled by tmpfs files.
+                *
+                * Given how unuse_vma() targets one particular offset
+                * in an anon_vma, once the anon_vma has been determined,
+                * this splitting happens to be just what is needed to
+                * handle where KSM pages have been swapped out: re-reading
+                * is unnecessarily slow, but we can fix that later on.
                  */
                 if (swap_count(*swap_map) &&
                      PageDirty(page) && PageSwapCache(page)) {
@@ -1251,10 +1260,11 @@ static void drain_mmlist(void)
  
  /*
   * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
- * corresponds to page offset `offset'.  Note that the type of this function
- * is sector_t, but it returns page offset into the bdev, not sector offset.
+ * corresponds to page offset for the specified swap entry.
+ * Note that the type of this function is sector_t, but it returns page offset
+ * into the bdev, not sector offset.
   */
-sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
+static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
  {
         struct swap_info_struct *sis;
         struct swap_extent *start_se;
@@ -1282,6 +1292,16 @@ sector_t map_swap_page(swp_entry_t entry, struct block_device **bdev)
         }
  }
  
+/*
+ * Returns the page offset into bdev for the specified page's swap entry.
+ */
+sector_t map_swap_page(struct page *page, struct block_device **bdev)
+{
+       swp_entry_t entry;
+       entry.val = page_private(page);
+       return map_swap_entry(entry, bdev);
+}
+
  /*
   * Free all of a swapdev's extent information
   */
@@ -2126,6 +2146,15 @@ bad_file:
         goto out;
  }
  
+/*
+ * Help swapoff by noting that swap entry belongs to shmem/tmpfs
+ * (in which case its reference count is never incremented).
+ */
+void swap_shmem_alloc(swp_entry_t entry)
+{
+       __swap_duplicate(entry, SWAP_MAP_SHMEM);
+}
+
  /*
   * increase reference count of swap entry by 1.
   */