[PATCH] hugetlb: check p?d_present in huge_pte_offset()
[linux-2.6.git] / arch / i386 / mm / hugetlbpage.c
1 /*
2  * IA-32 Huge TLB Page Support for Kernel.
3  *
4  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
5  */
6
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/fs.h>
10 #include <linux/mm.h>
11 #include <linux/hugetlb.h>
12 #include <linux/pagemap.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/err.h>
16 #include <linux/sysctl.h>
17 #include <asm/mman.h>
18 #include <asm/tlb.h>
19 #include <asm/tlbflush.h>
20
21 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
22 {
23         pgd_t *pgd;
24         pud_t *pud;
25         pmd_t *pmd;
26         pte_t *pte = NULL;
27
28         pgd = pgd_offset(mm, addr);
29         pud = pud_alloc(mm, pgd, addr);
30         pmd = pmd_alloc(mm, pud, addr);
31
32         if (!pmd)
33                 goto out;
34
35         pte = (pte_t *) pmd;
36         if (!pte_none(*pte) && !pte_huge(*pte))
37                 hugetlb_clean_stale_pgtable(pte);
38 out:
39         return pte;
40 }
41
42 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
43 {
44         pgd_t *pgd;
45         pud_t *pud;
46         pmd_t *pmd = NULL;
47
48         pgd = pgd_offset(mm, addr);
49         if (pgd_present(*pgd)) {
50                 pud = pud_offset(pgd, addr);
51                 if (pud_present(*pud))
52                         pmd = pmd_offset(pud, addr);
53         }
54         return (pte_t *) pmd;
55 }
56
57 /*
58  * This function checks for proper alignment of input addr and len parameters.
59  */
60 int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
61 {
62         if (len & ~HPAGE_MASK)
63                 return -EINVAL;
64         if (addr & ~HPAGE_MASK)
65                 return -EINVAL;
66         return 0;
67 }
68
69 #if 0   /* This is just for testing */
70 struct page *
71 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
72 {
73         unsigned long start = address;
74         int length = 1;
75         int nr;
76         struct page *page;
77         struct vm_area_struct *vma;
78
79         vma = find_vma(mm, addr);
80         if (!vma || !is_vm_hugetlb_page(vma))
81                 return ERR_PTR(-EINVAL);
82
83         pte = huge_pte_offset(mm, address);
84
85         /* hugetlb should be locked, and hence, prefaulted */
86         WARN_ON(!pte || pte_none(*pte));
87
88         page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
89
90         WARN_ON(!PageCompound(page));
91
92         return page;
93 }
94
95 int pmd_huge(pmd_t pmd)
96 {
97         return 0;
98 }
99
100 struct page *
101 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
102                 pmd_t *pmd, int write)
103 {
104         return NULL;
105 }
106
107 #else
108
109 struct page *
110 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
111 {
112         return ERR_PTR(-EINVAL);
113 }
114
115 int pmd_huge(pmd_t pmd)
116 {
117         return !!(pmd_val(pmd) & _PAGE_PSE);
118 }
119
120 struct page *
121 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
122                 pmd_t *pmd, int write)
123 {
124         struct page *page;
125
126         page = pte_page(*(pte_t *)pmd);
127         if (page)
128                 page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
129         return page;
130 }
131 #endif
132
133 void hugetlb_clean_stale_pgtable(pte_t *pte)
134 {
135         pmd_t *pmd = (pmd_t *) pte;
136         struct page *page;
137
138         page = pmd_page(*pmd);
139         pmd_clear(pmd);
140         dec_page_state(nr_page_table_pages);
141         page_cache_release(page);
142 }
143
144 /* x86_64 also uses this file */
145
146 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
147 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
148                 unsigned long addr, unsigned long len,
149                 unsigned long pgoff, unsigned long flags)
150 {
151         struct mm_struct *mm = current->mm;
152         struct vm_area_struct *vma;
153         unsigned long start_addr;
154
155         if (len > mm->cached_hole_size) {
156                 start_addr = mm->free_area_cache;
157         } else {
158                 start_addr = TASK_UNMAPPED_BASE;
159                 mm->cached_hole_size = 0;
160         }
161
162 full_search:
163         addr = ALIGN(start_addr, HPAGE_SIZE);
164
165         for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
166                 /* At this point:  (!vma || addr < vma->vm_end). */
167                 if (TASK_SIZE - len < addr) {
168                         /*
169                          * Start a new search - just in case we missed
170                          * some holes.
171                          */
172                         if (start_addr != TASK_UNMAPPED_BASE) {
173                                 start_addr = TASK_UNMAPPED_BASE;
174                                 mm->cached_hole_size = 0;
175                                 goto full_search;
176                         }
177                         return -ENOMEM;
178                 }
179                 if (!vma || addr + len <= vma->vm_start) {
180                         mm->free_area_cache = addr + len;
181                         return addr;
182                 }
183                 if (addr + mm->cached_hole_size < vma->vm_start)
184                         mm->cached_hole_size = vma->vm_start - addr;
185                 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
186         }
187 }
188
189 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
190                 unsigned long addr0, unsigned long len,
191                 unsigned long pgoff, unsigned long flags)
192 {
193         struct mm_struct *mm = current->mm;
194         struct vm_area_struct *vma, *prev_vma;
195         unsigned long base = mm->mmap_base, addr = addr0;
196         unsigned long largest_hole = mm->cached_hole_size;
197         int first_time = 1;
198
199         /* don't allow allocations above current base */
200         if (mm->free_area_cache > base)
201                 mm->free_area_cache = base;
202
203         if (len <= largest_hole) {
204                 largest_hole = 0;
205                 mm->free_area_cache  = base;
206         }
207 try_again:
208         /* make sure it can fit in the remaining address space */
209         if (mm->free_area_cache < len)
210                 goto fail;
211
212         /* either no address requested or cant fit in requested address hole */
213         addr = (mm->free_area_cache - len) & HPAGE_MASK;
214         do {
215                 /*
216                  * Lookup failure means no vma is above this address,
217                  * i.e. return with success:
218                  */
219                 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
220                         return addr;
221
222                 /*
223                  * new region fits between prev_vma->vm_end and
224                  * vma->vm_start, use it:
225                  */
226                 if (addr + len <= vma->vm_start &&
227                             (!prev_vma || (addr >= prev_vma->vm_end))) {
228                         /* remember the address as a hint for next time */
229                         mm->cached_hole_size = largest_hole;
230                         return (mm->free_area_cache = addr);
231                 } else {
232                         /* pull free_area_cache down to the first hole */
233                         if (mm->free_area_cache == vma->vm_end) {
234                                 mm->free_area_cache = vma->vm_start;
235                                 mm->cached_hole_size = largest_hole;
236                         }
237                 }
238
239                 /* remember the largest hole we saw so far */
240                 if (addr + largest_hole < vma->vm_start)
241                         largest_hole = vma->vm_start - addr;
242
243                 /* try just below the current vma->vm_start */
244                 addr = (vma->vm_start - len) & HPAGE_MASK;
245         } while (len <= vma->vm_start);
246
247 fail:
248         /*
249          * if hint left us with no space for the requested
250          * mapping then try again:
251          */
252         if (first_time) {
253                 mm->free_area_cache = base;
254                 largest_hole = 0;
255                 first_time = 0;
256                 goto try_again;
257         }
258         /*
259          * A failed mmap() very likely causes application failure,
260          * so fall back to the bottom-up function here. This scenario
261          * can happen with large stack limits and large mmap()
262          * allocations.
263          */
264         mm->free_area_cache = TASK_UNMAPPED_BASE;
265         mm->cached_hole_size = ~0UL;
266         addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
267                         len, pgoff, flags);
268
269         /*
270          * Restore the topdown base:
271          */
272         mm->free_area_cache = base;
273         mm->cached_hole_size = ~0UL;
274
275         return addr;
276 }
277
278 unsigned long
279 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
280                 unsigned long len, unsigned long pgoff, unsigned long flags)
281 {
282         struct mm_struct *mm = current->mm;
283         struct vm_area_struct *vma;
284
285         if (len & ~HPAGE_MASK)
286                 return -EINVAL;
287         if (len > TASK_SIZE)
288                 return -ENOMEM;
289
290         if (addr) {
291                 addr = ALIGN(addr, HPAGE_SIZE);
292                 vma = find_vma(mm, addr);
293                 if (TASK_SIZE - len >= addr &&
294                     (!vma || addr + len <= vma->vm_start))
295                         return addr;
296         }
297         if (mm->get_unmapped_area == arch_get_unmapped_area)
298                 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
299                                 pgoff, flags);
300         else
301                 return hugetlb_get_unmapped_area_topdown(file, addr, len,
302                                 pgoff, flags);
303 }
304
305 #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
306