thp: split_huge_page_mm/vma
[linux-3.10.git] / mm / pagewalk.c
1 #include <linux/mm.h>
2 #include <linux/highmem.h>
3 #include <linux/sched.h>
4 #include <linux/hugetlb.h>
5
6 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
7                           struct mm_walk *walk)
8 {
9         pte_t *pte;
10         int err = 0;
11
12         pte = pte_offset_map(pmd, addr);
13         for (;;) {
14                 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
15                 if (err)
16                        break;
17                 addr += PAGE_SIZE;
18                 if (addr == end)
19                         break;
20                 pte++;
21         }
22
23         pte_unmap(pte);
24         return err;
25 }
26
27 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
28                           struct mm_walk *walk)
29 {
30         pmd_t *pmd;
31         unsigned long next;
32         int err = 0;
33
34         pmd = pmd_offset(pud, addr);
35         do {
36                 next = pmd_addr_end(addr, end);
37                 split_huge_page_pmd(walk->mm, pmd);
38                 if (pmd_none_or_clear_bad(pmd)) {
39                         if (walk->pte_hole)
40                                 err = walk->pte_hole(addr, next, walk);
41                         if (err)
42                                 break;
43                         continue;
44                 }
45                 if (walk->pmd_entry)
46                         err = walk->pmd_entry(pmd, addr, next, walk);
47                 if (!err && walk->pte_entry)
48                         err = walk_pte_range(pmd, addr, next, walk);
49                 if (err)
50                         break;
51         } while (pmd++, addr = next, addr != end);
52
53         return err;
54 }
55
56 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
57                           struct mm_walk *walk)
58 {
59         pud_t *pud;
60         unsigned long next;
61         int err = 0;
62
63         pud = pud_offset(pgd, addr);
64         do {
65                 next = pud_addr_end(addr, end);
66                 if (pud_none_or_clear_bad(pud)) {
67                         if (walk->pte_hole)
68                                 err = walk->pte_hole(addr, next, walk);
69                         if (err)
70                                 break;
71                         continue;
72                 }
73                 if (walk->pud_entry)
74                         err = walk->pud_entry(pud, addr, next, walk);
75                 if (!err && (walk->pmd_entry || walk->pte_entry))
76                         err = walk_pmd_range(pud, addr, next, walk);
77                 if (err)
78                         break;
79         } while (pud++, addr = next, addr != end);
80
81         return err;
82 }
83
84 #ifdef CONFIG_HUGETLB_PAGE
85 static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
86                                        unsigned long end)
87 {
88         unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
89         return boundary < end ? boundary : end;
90 }
91
92 static int walk_hugetlb_range(struct vm_area_struct *vma,
93                               unsigned long addr, unsigned long end,
94                               struct mm_walk *walk)
95 {
96         struct hstate *h = hstate_vma(vma);
97         unsigned long next;
98         unsigned long hmask = huge_page_mask(h);
99         pte_t *pte;
100         int err = 0;
101
102         do {
103                 next = hugetlb_entry_end(h, addr, end);
104                 pte = huge_pte_offset(walk->mm, addr & hmask);
105                 if (pte && walk->hugetlb_entry)
106                         err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
107                 if (err)
108                         return err;
109         } while (addr = next, addr != end);
110
111         return 0;
112 }
113 #endif
114
115 /**
116  * walk_page_range - walk a memory map's page tables with a callback
117  * @mm: memory map to walk
118  * @addr: starting address
119  * @end: ending address
120  * @walk: set of callbacks to invoke for each level of the tree
121  *
122  * Recursively walk the page table for the memory area in a VMA,
123  * calling supplied callbacks. Callbacks are called in-order (first
124  * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
125  * etc.). If lower-level callbacks are omitted, walking depth is reduced.
126  *
127  * Each callback receives an entry pointer and the start and end of the
128  * associated range, and a copy of the original mm_walk for access to
129  * the ->private or ->mm fields.
130  *
131  * No locks are taken, but the bottom level iterator will map PTE
132  * directories from highmem if necessary.
133  *
134  * If any callback returns a non-zero value, the walk is aborted and
135  * the return value is propagated back to the caller. Otherwise 0 is returned.
136  */
137 int walk_page_range(unsigned long addr, unsigned long end,
138                     struct mm_walk *walk)
139 {
140         pgd_t *pgd;
141         unsigned long next;
142         int err = 0;
143
144         if (addr >= end)
145                 return err;
146
147         if (!walk->mm)
148                 return -EINVAL;
149
150         pgd = pgd_offset(walk->mm, addr);
151         do {
152                 struct vm_area_struct *uninitialized_var(vma);
153
154                 next = pgd_addr_end(addr, end);
155
156 #ifdef CONFIG_HUGETLB_PAGE
157                 /*
158                  * handle hugetlb vma individually because pagetable walk for
159                  * the hugetlb page is dependent on the architecture and
160                  * we can't handled it in the same manner as non-huge pages.
161                  */
162                 vma = find_vma(walk->mm, addr);
163                 if (vma && is_vm_hugetlb_page(vma)) {
164                         if (vma->vm_end < next)
165                                 next = vma->vm_end;
166                         /*
167                          * Hugepage is very tightly coupled with vma, so
168                          * walk through hugetlb entries within a given vma.
169                          */
170                         err = walk_hugetlb_range(vma, addr, next, walk);
171                         if (err)
172                                 break;
173                         pgd = pgd_offset(walk->mm, next);
174                         continue;
175                 }
176 #endif
177                 if (pgd_none_or_clear_bad(pgd)) {
178                         if (walk->pte_hole)
179                                 err = walk->pte_hole(addr, next, walk);
180                         if (err)
181                                 break;
182                         pgd++;
183                         continue;
184                 }
185                 if (walk->pgd_entry)
186                         err = walk->pgd_entry(pgd, addr, next, walk);
187                 if (!err &&
188                     (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
189                         err = walk_pud_range(pgd, addr, next, walk);
190                 if (err)
191                         break;
192                 pgd++;
193         } while (addr = next, addr != end);
194
195         return err;
196 }