[PATCH] hugetlb: move stale pte check into huge_pte_alloc()
[linux-2.6.git] / arch / i386 / mm / hugetlbpage.c
1 /*
2  * IA-32 Huge TLB Page Support for Kernel.
3  *
4  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
5  */
6
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/fs.h>
10 #include <linux/mm.h>
11 #include <linux/hugetlb.h>
12 #include <linux/pagemap.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/err.h>
16 #include <linux/sysctl.h>
17 #include <asm/mman.h>
18 #include <asm/tlb.h>
19 #include <asm/tlbflush.h>
20
21 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22 {
23         pgd_t *pgd;
24         pud_t *pud;
25         pmd_t *pmd;
26         pte_t *pte = NULL;
27
28         pgd = pgd_offset(mm, addr);
29         pud = pud_alloc(mm, pgd, addr);
30         pmd = pmd_alloc(mm, pud, addr);
31
32         if (!pmd)
33                 goto out;
34
35         pte = (pte_t *) pmd;
36         if (!pte_none(*pte) && !pte_huge(*pte))
37                 hugetlb_clean_stale_pgtable(pte);
38 out:
39         return pte;
40 }
41
42 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43 {
44         pgd_t *pgd;
45         pud_t *pud;
46         pmd_t *pmd = NULL;
47
48         pgd = pgd_offset(mm, addr);
49         pud = pud_offset(pgd, addr);
50         pmd = pmd_offset(pud, addr);
51         return (pte_t *) pmd;
52 }
53
54 /*
55  * This function checks for proper alignment of input addr and len parameters.
56  */
57 int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
58 {
59         if (len & ~HPAGE_MASK)
60                 return -EINVAL;
61         if (addr & ~HPAGE_MASK)
62                 return -EINVAL;
63         return 0;
64 }
65
66 #if 0   /* This is just for testing */
67 struct page *
68 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
69 {
70         unsigned long start = address;
71         int length = 1;
72         int nr;
73         struct page *page;
74         struct vm_area_struct *vma;
75
76         vma = find_vma(mm, addr);
77         if (!vma || !is_vm_hugetlb_page(vma))
78                 return ERR_PTR(-EINVAL);
79
80         pte = huge_pte_offset(mm, address);
81
82         /* hugetlb should be locked, and hence, prefaulted */
83         WARN_ON(!pte || pte_none(*pte));
84
85         page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
86
87         WARN_ON(!PageCompound(page));
88
89         return page;
90 }
91
92 int pmd_huge(pmd_t pmd)
93 {
94         return 0;
95 }
96
97 struct page *
98 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
99                 pmd_t *pmd, int write)
100 {
101         return NULL;
102 }
103
104 #else
105
106 struct page *
107 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
108 {
109         return ERR_PTR(-EINVAL);
110 }
111
112 int pmd_huge(pmd_t pmd)
113 {
114         return !!(pmd_val(pmd) & _PAGE_PSE);
115 }
116
117 struct page *
118 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
119                 pmd_t *pmd, int write)
120 {
121         struct page *page;
122
123         page = pte_page(*(pte_t *)pmd);
124         if (page)
125                 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
126         return page;
127 }
128 #endif
129
130 void hugetlb_clean_stale_pgtable(pte_t *pte)
131 {
132         pmd_t *pmd = (pmd_t *) pte;
133         struct page *page;
134
135         page = pmd_page(*pmd);
136         pmd_clear(pmd);
137         dec_page_state(nr_page_table_pages);
138         page_cache_release(page);
139 }
140
141 /* x86_64 also uses this file */
142
143 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
144 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
145                 unsigned long addr, unsigned long len,
146                 unsigned long pgoff, unsigned long flags)
147 {
148         struct mm_struct *mm = current->mm;
149         struct vm_area_struct *vma;
150         unsigned long start_addr;
151
152         if (len > mm->cached_hole_size) {
153                 start_addr = mm->free_area_cache;
154         } else {
155                 start_addr = TASK_UNMAPPED_BASE;
156                 mm->cached_hole_size = 0;
157         }
158
159 full_search:
160         addr = ALIGN(start_addr, HPAGE_SIZE);
161
162         for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
163                 /* At this point:  (!vma || addr < vma->vm_end). */
164                 if (TASK_SIZE - len < addr) {
165                         /*
166                          * Start a new search - just in case we missed
167                          * some holes.
168                          */
169                         if (start_addr != TASK_UNMAPPED_BASE) {
170                                 start_addr = TASK_UNMAPPED_BASE;
171                                 mm->cached_hole_size = 0;
172                                 goto full_search;
173                         }
174                         return -ENOMEM;
175                 }
176                 if (!vma || addr + len <= vma->vm_start) {
177                         mm->free_area_cache = addr + len;
178                         return addr;
179                 }
180                 if (addr + mm->cached_hole_size < vma->vm_start)
181                         mm->cached_hole_size = vma->vm_start - addr;
182                 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
183         }
184 }
185
186 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
187                 unsigned long addr0, unsigned long len,
188                 unsigned long pgoff, unsigned long flags)
189 {
190         struct mm_struct *mm = current->mm;
191         struct vm_area_struct *vma, *prev_vma;
192         unsigned long base = mm->mmap_base, addr = addr0;
193         unsigned long largest_hole = mm->cached_hole_size;
194         int first_time = 1;
195
196         /* don't allow allocations above current base */
197         if (mm->free_area_cache > base)
198                 mm->free_area_cache = base;
199
200         if (len <= largest_hole) {
201                 largest_hole = 0;
202                 mm->free_area_cache  = base;
203         }
204 try_again:
205         /* make sure it can fit in the remaining address space */
206         if (mm->free_area_cache < len)
207                 goto fail;
208
209         /* either no address requested or cant fit in requested address hole */
210         addr = (mm->free_area_cache - len) & HPAGE_MASK;
211         do {
212                 /*
213                  * Lookup failure means no vma is above this address,
214                  * i.e. return with success:
215                  */
216                 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
217                         return addr;
218
219                 /*
220                  * new region fits between prev_vma->vm_end and
221                  * vma->vm_start, use it:
222                  */
223                 if (addr + len <= vma->vm_start &&
224                             (!prev_vma || (addr >= prev_vma->vm_end))) {
225                         /* remember the address as a hint for next time */
226                         mm->cached_hole_size = largest_hole;
227                         return (mm->free_area_cache = addr);
228                 } else {
229                         /* pull free_area_cache down to the first hole */
230                         if (mm->free_area_cache == vma->vm_end) {
231                                 mm->free_area_cache = vma->vm_start;
232                                 mm->cached_hole_size = largest_hole;
233                         }
234                 }
235
236                 /* remember the largest hole we saw so far */
237                 if (addr + largest_hole < vma->vm_start)
238                         largest_hole = vma->vm_start - addr;
239
240                 /* try just below the current vma->vm_start */
241                 addr = (vma->vm_start - len) & HPAGE_MASK;
242         } while (len <= vma->vm_start);
243
244 fail:
245         /*
246          * if hint left us with no space for the requested
247          * mapping then try again:
248          */
249         if (first_time) {
250                 mm->free_area_cache = base;
251                 largest_hole = 0;
252                 first_time = 0;
253                 goto try_again;
254         }
255         /*
256          * A failed mmap() very likely causes application failure,
257          * so fall back to the bottom-up function here. This scenario
258          * can happen with large stack limits and large mmap()
259          * allocations.
260          */
261         mm->free_area_cache = TASK_UNMAPPED_BASE;
262         mm->cached_hole_size = ~0UL;
263         addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
264                         len, pgoff, flags);
265
266         /*
267          * Restore the topdown base:
268          */
269         mm->free_area_cache = base;
270         mm->cached_hole_size = ~0UL;
271
272         return addr;
273 }
274
275 unsigned long
276 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
277                 unsigned long len, unsigned long pgoff, unsigned long flags)
278 {
279         struct mm_struct *mm = current->mm;
280         struct vm_area_struct *vma;
281
282         if (len & ~HPAGE_MASK)
283                 return -EINVAL;
284         if (len > TASK_SIZE)
285                 return -ENOMEM;
286
287         if (addr) {
288                 addr = ALIGN(addr, HPAGE_SIZE);
289                 vma = find_vma(mm, addr);
290                 if (TASK_SIZE - len >= addr &&
291                     (!vma || addr + len <= vma->vm_start))
292                         return addr;
293         }
294         if (mm->get_unmapped_area == arch_get_unmapped_area)
295                 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
296                                 pgoff, flags);
297         else
298                 return hugetlb_get_unmapped_area_topdown(file, addr, len,
299                                 pgoff, flags);
300 }
301
302 #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
303