thp: pte alloc trans splitting
Andrea Arcangeli [Thu, 13 Jan 2011 23:46:43 +0000 (15:46 -0800)]
pte alloc routines must wait for split_huge_page if the pmd is not present
and not null (i.e.  pmd_trans_splitting).  The additional branches are
optimized away at compile time by pmd_trans_splitting if the config option
is off.  However we must pass the vma down in order to know the anon_vma
lock to wait for.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

arch/arm/mm/pgd.c
arch/ia64/mm/hugetlbpage.c
arch/sh/mm/hugetlbpage.c
arch/sparc/mm/generic_32.c
arch/sparc/mm/generic_64.c
arch/sparc/mm/hugetlbpage.c
arch/um/kernel/skas/mmu.c
arch/x86/kernel/tboot.c
include/linux/mm.h
mm/memory.c
mm/mremap.c

index 93292a1..709244c 100644 (file)
@@ -50,7 +50,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
                if (!new_pmd)
                        goto no_pmd;
 
-               new_pte = pte_alloc_map(mm, new_pmd, 0);
+               new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
                if (!new_pte)
                        goto no_pte;
 
index 1841ee7..5ca674b 100644 (file)
@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
        if (pud) {
                pmd = pmd_alloc(mm, pud, taddr);
                if (pmd)
-                       pte = pte_alloc_map(mm, pmd, taddr);
+                       pte = pte_alloc_map(mm, NULL, pmd, taddr);
        }
        return pte;
 }
index 9163db3..d776234 100644 (file)
@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                if (pud) {
                        pmd = pmd_alloc(mm, pud, addr);
                        if (pmd)
-                               pte = pte_alloc_map(mm, pmd, addr);
+                               pte = pte_alloc_map(mm, NULL, pmd, addr);
                }
        }
 
index 5edcac1..e6067b7 100644 (file)
@@ -50,7 +50,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
                end = PGDIR_SIZE;
        offset -= address;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, address);
+               pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
                if (!pte)
                        return -ENOMEM;
                io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
index 04f2bf4..3cb00df 100644 (file)
@@ -92,7 +92,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
                end = PGDIR_SIZE;
        offset -= address;
        do {
-               pte_t * pte = pte_alloc_map(mm, pmd, address);
+               pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
                if (!pte)
                        return -ENOMEM;
                io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
index 5fdddf1..f4e9764 100644 (file)
@@ -214,7 +214,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
        if (pud) {
                pmd = pmd_alloc(mm, pud, addr);
                if (pmd)
-                       pte = pte_alloc_map(mm, pmd, addr);
+                       pte = pte_alloc_map(mm, NULL, pmd, addr);
        }
        return pte;
 }
index 3d099f9..1aee587 100644 (file)
@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
        if (!pmd)
                goto out_pmd;
 
-       pte = pte_alloc_map(mm, pmd, proc);
+       pte = pte_alloc_map(mm, NULL, pmd, proc);
        if (!pte)
                goto out_pte;
 
index c2f1b26..998e972 100644 (file)
@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
        pmd = pmd_alloc(&tboot_mm, pud, vaddr);
        if (!pmd)
                return -1;
-       pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
+       pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
        if (!pte)
                return -1;
        set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
index 6bef67d..14ddd98 100644 (file)
@@ -1131,7 +1131,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
 /*
@@ -1200,16 +1201,18 @@ static inline void pgtable_page_dtor(struct page *page)
        pte_unmap(pte);                                 \
 } while (0)
 
-#define pte_alloc_map(mm, pmd, address)                        \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
-               NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address)                           \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,    \
+                                                       pmd, address))? \
+        NULL: pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)     \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,   \
+                                                       pmd, address))? \
                NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)                 \
-       ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+       ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
                NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
index bdf1936..567bca8 100644 (file)
@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
        }
 }
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+               pmd_t *pmd, unsigned long address)
 {
        pgtable_t new = pte_alloc_one(mm, address);
+       int wait_split_huge_page;
        if (!new)
                return -ENOMEM;
 
@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
        smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
 
        spin_lock(&mm->page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       wait_split_huge_page = 0;
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                mm->nr_ptes++;
                pmd_populate(mm, pmd, new);
                new = NULL;
-       }
+       } else if (unlikely(pmd_trans_splitting(*pmd)))
+               wait_split_huge_page = 1;
        spin_unlock(&mm->page_table_lock);
        if (new)
                pte_free(mm, new);
+       if (wait_split_huge_page)
+               wait_split_huge_page(vma->anon_vma, pmd);
        return 0;
 }
 
@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
        smp_wmb(); /* See comment in __pte_alloc */
 
        spin_lock(&init_mm.page_table_lock);
-       if (!pmd_present(*pmd)) {       /* Has another populated it ? */
+       if (likely(pmd_none(*pmd))) {   /* Has another populated it ? */
                pmd_populate_kernel(&init_mm, pmd, new);
                new = NULL;
-       }
+       } else
+               VM_BUG_ON(pmd_trans_splitting(*pmd));
        spin_unlock(&init_mm.page_table_lock);
        if (new)
                pte_free_kernel(&init_mm, new);
@@ -3253,7 +3260,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        pmd = pmd_alloc(mm, pud, address);
        if (!pmd)
                return VM_FAULT_OOM;
-       pte = pte_alloc_map(mm, pmd, address);
+       pte = pte_alloc_map(mm, vma, pmd, address);
        if (!pte)
                return VM_FAULT_OOM;
 
index 563fbdd..b09eefa 100644 (file)
@@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
        return pmd;
 }
 
-static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
+static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+                           unsigned long addr)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
        if (!pmd)
                return NULL;
 
-       if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr))
+       VM_BUG_ON(pmd_trans_huge(*pmd));
+       if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr))
                return NULL;
 
        return pmd;
@@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                old_pmd = get_old_pmd(vma->vm_mm, old_addr);
                if (!old_pmd)
                        continue;
-               new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
+               new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
                if (!new_pmd)
                        break;
                next = (new_addr + PMD_SIZE) & PMD_MASK;