mm: extend KSM refcounts to the anon_vma root
Rik van Riel [Tue, 10 Aug 2010 00:18:41 +0000 (17:18 -0700)]
KSM reference counts can cause an anon_vma to exist after the processe it
belongs to have already exited.  Because the anon_vma lock now lives in
the root anon_vma, we need to ensure that the root anon_vma stays around
until after all the "child" anon_vmas have been freed.

The obvious way to do this is to have a "child" anon_vma take a reference
to the root in anon_vma_fork.  When the anon_vma is freed at munmap or
process exit, we drop the refcount in anon_vma_unlink and possibly free
the root anon_vma.

The KSM anon_vma reference count function also needs to be modified to
deal with the possibility of freeing 2 levels of anon_vma.  The easiest
way to do this is to break out the KSM magic and make it generic.

When compiling without CONFIG_KSM, this code is compiled out.

Signed-off-by: Rik van Riel <riel@redhat.com>
Tested-by: Larry Woodman <lwoodman@redhat.com>
Acked-by: Larry Woodman <lwoodman@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/rmap.h
mm/ksm.c
mm/migrate.c
mm/rmap.c

index af43cb9..dc9b3c0 100644 (file)
@@ -81,6 +81,13 @@ static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
 {
        return atomic_read(&anon_vma->external_refcount);
 }
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+       atomic_inc(&anon_vma->external_refcount);
+}
+
+void drop_anon_vma(struct anon_vma *);
 #else
 static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
 {
@@ -90,6 +97,14 @@ static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
 {
        return 0;
 }
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+}
+
+static inline void drop_anon_vma(struct anon_vma *anon_vma)
+{
+}
 #endif /* CONFIG_KSM */
 
 static inline struct anon_vma *page_anon_vma(struct page *page)
index da6037c..9f2acc9 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
                          struct anon_vma *anon_vma)
 {
        rmap_item->anon_vma = anon_vma;
-       atomic_inc(&anon_vma->external_refcount);
+       get_anon_vma(anon_vma);
 }
 
-static void drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
 {
        struct anon_vma *anon_vma = rmap_item->anon_vma;
 
-       if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
-               int empty = list_empty(&anon_vma->head);
-               anon_vma_unlock(anon_vma);
-               if (empty)
-                       anon_vma_free(anon_vma);
-       }
+       drop_anon_vma(anon_vma);
 }
 
 /*
@@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *rmap_item)
         * It is not an accident that whenever we want to break COW
         * to undo, we also need to drop a reference to the anon_vma.
         */
-       drop_anon_vma(rmap_item);
+       ksm_drop_anon_vma(rmap_item);
 
        down_read(&mm->mmap_sem);
        if (ksm_test_exit(mm))
@@ -470,7 +465,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
                        ksm_pages_sharing--;
                else
                        ksm_pages_shared--;
-               drop_anon_vma(rmap_item);
+               ksm_drop_anon_vma(rmap_item);
                rmap_item->address &= PAGE_MASK;
                cond_resched();
        }
@@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
                else
                        ksm_pages_shared--;
 
-               drop_anon_vma(rmap_item);
+               ksm_drop_anon_vma(rmap_item);
                rmap_item->address &= PAGE_MASK;
 
        } else if (rmap_item->address & UNSTABLE_FLAG) {
index 5208fa1..38e7cad 100644 (file)
@@ -639,7 +639,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                         * exist when the page is remapped later
                         */
                        anon_vma = page_anon_vma(page);
-                       atomic_inc(&anon_vma->external_refcount);
+                       get_anon_vma(anon_vma);
                }
        }
 
@@ -682,12 +682,8 @@ skip_unmap:
 rcu_unlock:
 
        /* Drop an anon_vma reference if we took one */
-       if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
-               int empty = list_empty(&anon_vma->head);
-               anon_vma_unlock(anon_vma);
-               if (empty)
-                       anon_vma_free(anon_vma);
-       }
+       if (anon_vma)
+               drop_anon_vma(anon_vma);
 
        if (rcu_locked)
                rcu_read_unlock();
index caa48b2..07e9814 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
         * lock any of the anon_vmas in this anon_vma tree.
         */
        anon_vma->root = pvma->anon_vma->root;
+       /*
+        * With KSM refcounts, an anon_vma can stay around longer than the
+        * process it belongs to.  The root anon_vma needs to be pinned
+        * until this anon_vma is freed, because the lock lives in the root.
+        */
+       get_anon_vma(anon_vma->root);
        /* Mark this anon_vma as the one where our new (COWed) pages go. */
        vma->anon_vma = anon_vma;
        anon_vma_chain_link(vma, avc, anon_vma);
@@ -264,8 +270,12 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
        empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
        anon_vma_unlock(anon_vma);
 
-       if (empty)
+       if (empty) {
+               /* We no longer need the root anon_vma */
+               if (anon_vma->root != anon_vma)
+                       drop_anon_vma(anon_vma->root);
                anon_vma_free(anon_vma);
+       }
 }
 
 void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1382,6 +1392,40 @@ int try_to_munlock(struct page *page)
                return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
+#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
+/*
+ * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
+ * if necessary.  Be careful to do all the tests under the lock.  Once
+ * we know we are the last user, nobody else can get a reference and we
+ * can do the freeing without the lock.
+ */
+void drop_anon_vma(struct anon_vma *anon_vma)
+{
+       if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+               struct anon_vma *root = anon_vma->root;
+               int empty = list_empty(&anon_vma->head);
+               int last_root_user = 0;
+               int root_empty = 0;
+
+               /*
+                * The refcount on a non-root anon_vma got dropped.  Drop
+                * the refcount on the root and check if we need to free it.
+                */
+               if (empty && anon_vma != root) {
+                       last_root_user = atomic_dec_and_test(&root->external_refcount);
+                       root_empty = list_empty(&root->head);
+               }
+               anon_vma_unlock(anon_vma);
+
+               if (empty) {
+                       anon_vma_free(anon_vma);
+                       if (root_empty && last_root_user)
+                               anon_vma_free(root);
+               }
+       }
+}
+#endif
+
 #ifdef CONFIG_MIGRATION
 /*
  * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():