mm: accelerate mm_populate() treatment of THP pages
Michel Lespinasse [Sat, 23 Feb 2013 00:35:56 +0000 (16:35 -0800)]
This change adds a follow_page_mask function which is equivalent to
follow_page, but with an extra page_mask argument.

follow_page_mask sets *page_mask to HPAGE_PMD_NR - 1 when it encounters
a THP page, and to 0 in other cases.

__get_user_pages() makes use of this in order to accelerate populating
THP ranges - that is, when both the pages and vmas arrays are NULL, we
don't need to iterate HPAGE_PMD_NR times to cover a single THP page (and
we also avoid taking mm->page_table_lock that many times).

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/mm.h
mm/memory.c
mm/nommu.c

index 87b0ef2..6124f1d 100644 (file)
@@ -1629,8 +1629,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn);
 
-struct page *follow_page(struct vm_area_struct *, unsigned long address,
-                       unsigned int foll_flags);
+struct page *follow_page_mask(struct vm_area_struct *vma,
+                             unsigned long address, unsigned int foll_flags,
+                             unsigned int *page_mask);
+
+static inline struct page *follow_page(struct vm_area_struct *vma,
+               unsigned long address, unsigned int foll_flags)
+{
+       unsigned int unused_page_mask;
+       return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
+}
+
 #define FOLL_WRITE     0x01    /* check pte is writable */
 #define FOLL_TOUCH     0x02    /* mark page accessed */
 #define FOLL_GET       0x04    /* do get_page on page */
index bc929db..5d2ef12 100644 (file)
@@ -1462,10 +1462,11 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
 /**
- * follow_page - look up a page descriptor from a user-virtual address
+ * follow_page_mask - look up a page descriptor from a user-virtual address
  * @vma: vm_area_struct mapping @address
  * @address: virtual address to look up
  * @flags: flags modifying lookup behaviour
+ * @page_mask: on output, *page_mask is set according to the size of the page
  *
  * @flags can have FOLL_ flags set, defined in <linux/mm.h>
  *
@@ -1473,8 +1474,9 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
  * an error pointer if there is a mapping to something not represented
  * by a page descriptor (see also vm_normal_page()).
  */
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-                       unsigned int flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+                             unsigned long address, unsigned int flags,
+                             unsigned int *page_mask)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -1484,6 +1486,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        struct page *page;
        struct mm_struct *mm = vma->vm_mm;
 
+       *page_mask = 0;
+
        page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
        if (!IS_ERR(page)) {
                BUG_ON(flags & FOLL_GET);
@@ -1530,6 +1534,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                                page = follow_trans_huge_pmd(vma, address,
                                                             pmd, flags);
                                spin_unlock(&mm->page_table_lock);
+                               *page_mask = HPAGE_PMD_NR - 1;
                                goto out;
                        }
                } else
@@ -1684,6 +1689,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 {
        long i;
        unsigned long vm_flags;
+       unsigned int page_mask;
 
        if (!nr_pages)
                return 0;
@@ -1761,6 +1767,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                get_page(page);
                        }
                        pte_unmap(pte);
+                       page_mask = 0;
                        goto next_page;
                }
 
@@ -1778,6 +1785,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                do {
                        struct page *page;
                        unsigned int foll_flags = gup_flags;
+                       unsigned int page_increm;
 
                        /*
                         * If we have a pending SIGKILL, don't keep faulting
@@ -1787,7 +1795,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                return i ? i : -ERESTARTSYS;
 
                        cond_resched();
-                       while (!(page = follow_page(vma, start, foll_flags))) {
+                       while (!(page = follow_page_mask(vma, start,
+                                               foll_flags, &page_mask))) {
                                int ret;
                                unsigned int fault_flags = 0;
 
@@ -1861,13 +1870,19 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
                                flush_anon_page(vma, page, start);
                                flush_dcache_page(page);
+                               page_mask = 0;
                        }
 next_page:
-                       if (vmas)
+                       if (vmas) {
                                vmas[i] = vma;
-                       i++;
-                       start += PAGE_SIZE;
-                       nr_pages--;
+                               page_mask = 0;
+                       }
+                       page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+                       if (page_increm > nr_pages)
+                               page_increm = nr_pages;
+                       i += page_increm;
+                       start += page_increm * PAGE_SIZE;
+                       nr_pages -= page_increm;
                } while (nr_pages && start < vma->vm_end);
        } while (nr_pages);
        return i;
index 6ab7066..da0d210 100644 (file)
@@ -1819,9 +1819,11 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
        return ret;
 }
 
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-                       unsigned int foll_flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+                             unsigned long address, unsigned int flags,
+                             unsigned int *page_mask)
 {
+       *page_mask = 0;
        return NULL;
 }