x86: PAT use reserve free memtype in ioremap and iounmap
[linux-3.10.git] / arch / x86 / mm / ioremap.c
1 /*
2  * Re-map IO memory to kernel address space so that we can access it.
3  * This is needed for high PCI addresses that aren't mapped in the
4  * 640k-1MB IO memory area on PC's
5  *
6  * (C) Copyright 1995 1996 Linus Torvalds
7  */
8
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
11 #include <linux/io.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15
16 #include <asm/cacheflush.h>
17 #include <asm/e820.h>
18 #include <asm/fixmap.h>
19 #include <asm/pgtable.h>
20 #include <asm/tlbflush.h>
21 #include <asm/pgalloc.h>
22 #include <asm/pat.h>
23
24 #ifdef CONFIG_X86_64
25
26 unsigned long __phys_addr(unsigned long x)
27 {
28         if (x >= __START_KERNEL_map)
29                 return x - __START_KERNEL_map + phys_base;
30         return x - PAGE_OFFSET;
31 }
32 EXPORT_SYMBOL(__phys_addr);
33
34 static inline int phys_addr_valid(unsigned long addr)
35 {
36         return addr < (1UL << boot_cpu_data.x86_phys_bits);
37 }
38
39 #else
40
41 static inline int phys_addr_valid(unsigned long addr)
42 {
43         return 1;
44 }
45
46 #endif
47
48 int page_is_ram(unsigned long pagenr)
49 {
50         unsigned long addr, end;
51         int i;
52
53         /*
54          * A special case is the first 4Kb of memory;
55          * This is a BIOS owned area, not kernel ram, but generally
56          * not listed as such in the E820 table.
57          */
58         if (pagenr == 0)
59                 return 0;
60
61         /*
62          * Second special case: Some BIOSen report the PC BIOS
63          * area (640->1Mb) as ram even though it is not.
64          */
65         if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
66                     pagenr < (BIOS_END >> PAGE_SHIFT))
67                 return 0;
68
69         for (i = 0; i < e820.nr_map; i++) {
70                 /*
71                  * Not usable memory:
72                  */
73                 if (e820.map[i].type != E820_RAM)
74                         continue;
75                 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
76                 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
77
78
79                 if ((pagenr >= addr) && (pagenr < end))
80                         return 1;
81         }
82         return 0;
83 }
84
85 /*
86  * Fix up the linear direct mapping of the kernel to avoid cache attribute
87  * conflicts.
88  */
89 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
90                                unsigned long prot_val)
91 {
92         unsigned long nrpages = size >> PAGE_SHIFT;
93         int err;
94
95         switch (prot_val) {
96         case _PAGE_CACHE_UC:
97         default:
98                 err = set_memory_uc(vaddr, nrpages);
99                 break;
100         case _PAGE_CACHE_WB:
101                 err = set_memory_wb(vaddr, nrpages);
102                 break;
103         }
104
105         return err;
106 }
107
108 /*
109  * Remap an arbitrary physical address space into the kernel virtual
110  * address space. Needed when the kernel wants to access high addresses
111  * directly.
112  *
113  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
114  * have to convert them into an offset in a page-aligned mapping, but the
115  * caller shouldn't need to know that small detail.
116  */
117 static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
118                                unsigned long prot_val)
119 {
120         unsigned long pfn, offset, last_addr, vaddr;
121         struct vm_struct *area;
122         unsigned long new_prot_val;
123         pgprot_t prot;
124
125         /* Don't allow wraparound or zero size */
126         last_addr = phys_addr + size - 1;
127         if (!size || last_addr < phys_addr)
128                 return NULL;
129
130         if (!phys_addr_valid(phys_addr)) {
131                 printk(KERN_WARNING "ioremap: invalid physical address %lx\n",
132                        phys_addr);
133                 WARN_ON_ONCE(1);
134                 return NULL;
135         }
136
137         /*
138          * Don't remap the low PCI/ISA area, it's always mapped..
139          */
140         if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
141                 return (__force void __iomem *)phys_to_virt(phys_addr);
142
143         /*
144          * Don't allow anybody to remap normal RAM that we're using..
145          */
146         for (pfn = phys_addr >> PAGE_SHIFT;
147                                 (pfn << PAGE_SHIFT) < last_addr; pfn++) {
148
149                 int is_ram = page_is_ram(pfn);
150
151                 if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
152                         return NULL;
153                 WARN_ON_ONCE(is_ram);
154         }
155
156         /*
157          * Mappings have to be page-aligned
158          */
159         offset = phys_addr & ~PAGE_MASK;
160         phys_addr &= PAGE_MASK;
161         size = PAGE_ALIGN(last_addr+1) - phys_addr;
162
163         if (reserve_memtype(phys_addr, phys_addr + size,
164                             prot_val, &new_prot_val)) {
165                 /*
166                  * Do not fallback to certain memory types with certain
167                  * requested type:
168                  * - request is uncached, return cannot be write-back
169                  */
170                 if ((prot_val == _PAGE_CACHE_UC &&
171                      new_prot_val == _PAGE_CACHE_WB)) {
172                         free_memtype(phys_addr, phys_addr + size);
173                         return NULL;
174                 }
175                 prot_val = new_prot_val;
176         }
177
178         switch (prot_val) {
179         case _PAGE_CACHE_UC:
180         default:
181                 prot = PAGE_KERNEL_NOCACHE;
182                 break;
183         case _PAGE_CACHE_WB:
184                 prot = PAGE_KERNEL;
185                 break;
186         }
187
188         /*
189          * Ok, go for it..
190          */
191         area = get_vm_area(size, VM_IOREMAP);
192         if (!area)
193                 return NULL;
194         area->phys_addr = phys_addr;
195         vaddr = (unsigned long) area->addr;
196         if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
197                 free_memtype(phys_addr, phys_addr + size);
198                 free_vm_area(area);
199                 return NULL;
200         }
201
202         if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
203                 free_memtype(phys_addr, phys_addr + size);
204                 vunmap(area->addr);
205                 return NULL;
206         }
207
208         return (void __iomem *) (vaddr + offset);
209 }
210
211 /**
212  * ioremap_nocache     -   map bus memory into CPU space
213  * @offset:    bus address of the memory
214  * @size:      size of the resource to map
215  *
216  * ioremap_nocache performs a platform specific sequence of operations to
217  * make bus memory CPU accessible via the readb/readw/readl/writeb/
218  * writew/writel functions and the other mmio helpers. The returned
219  * address is not guaranteed to be usable directly as a virtual
220  * address.
221  *
222  * This version of ioremap ensures that the memory is marked uncachable
223  * on the CPU as well as honouring existing caching rules from things like
224  * the PCI bus. Note that there are other caches and buffers on many
225  * busses. In particular driver authors should read up on PCI writes
226  *
227  * It's useful if some control registers are in such an area and
228  * write combining or read caching is not desirable:
229  *
230  * Must be freed with iounmap.
231  */
232 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
233 {
234         return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
235 }
236 EXPORT_SYMBOL(ioremap_nocache);
237
238 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
239 {
240         return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
241 }
242 EXPORT_SYMBOL(ioremap_cache);
243
244 /**
245  * iounmap - Free a IO remapping
246  * @addr: virtual address from ioremap_*
247  *
248  * Caller must ensure there is only one unmapping for the same pointer.
249  */
250 void iounmap(volatile void __iomem *addr)
251 {
252         struct vm_struct *p, *o;
253
254         if ((void __force *)addr <= high_memory)
255                 return;
256
257         /*
258          * __ioremap special-cases the PCI/ISA range by not instantiating a
259          * vm_area and by simply returning an address into the kernel mapping
260          * of ISA space.   So handle that here.
261          */
262         if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
263             addr < phys_to_virt(ISA_END_ADDRESS))
264                 return;
265
266         addr = (volatile void __iomem *)
267                 (PAGE_MASK & (unsigned long __force)addr);
268
269         /* Use the vm area unlocked, assuming the caller
270            ensures there isn't another iounmap for the same address
271            in parallel. Reuse of the virtual address is prevented by
272            leaving it in the global lists until we're done with it.
273            cpa takes care of the direct mappings. */
274         read_lock(&vmlist_lock);
275         for (p = vmlist; p; p = p->next) {
276                 if (p->addr == addr)
277                         break;
278         }
279         read_unlock(&vmlist_lock);
280
281         if (!p) {
282                 printk(KERN_ERR "iounmap: bad address %p\n", addr);
283                 dump_stack();
284                 return;
285         }
286
287         free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
288
289         /* Finally remove it */
290         o = remove_vm_area((void *)addr);
291         BUG_ON(p != o || o == NULL);
292         kfree(p);
293 }
294 EXPORT_SYMBOL(iounmap);
295
296 #ifdef CONFIG_X86_32
297
298 int __initdata early_ioremap_debug;
299
300 static int __init early_ioremap_debug_setup(char *str)
301 {
302         early_ioremap_debug = 1;
303
304         return 0;
305 }
306 early_param("early_ioremap_debug", early_ioremap_debug_setup);
307
308 static __initdata int after_paging_init;
309 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
310                 __section(.bss.page_aligned);
311
312 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
313 {
314         /* Don't assume we're using swapper_pg_dir at this point */
315         pgd_t *base = __va(read_cr3());
316         pgd_t *pgd = &base[pgd_index(addr)];
317         pud_t *pud = pud_offset(pgd, addr);
318         pmd_t *pmd = pmd_offset(pud, addr);
319
320         return pmd;
321 }
322
323 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
324 {
325         return &bm_pte[pte_index(addr)];
326 }
327
328 void __init early_ioremap_init(void)
329 {
330         pmd_t *pmd;
331
332         if (early_ioremap_debug)
333                 printk(KERN_INFO "early_ioremap_init()\n");
334
335         pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
336         memset(bm_pte, 0, sizeof(bm_pte));
337         pmd_populate_kernel(&init_mm, pmd, bm_pte);
338
339         /*
340          * The boot-ioremap range spans multiple pmds, for which
341          * we are not prepared:
342          */
343         if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
344                 WARN_ON(1);
345                 printk(KERN_WARNING "pmd %p != %p\n",
346                        pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
347                 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
348                         fix_to_virt(FIX_BTMAP_BEGIN));
349                 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
350                         fix_to_virt(FIX_BTMAP_END));
351
352                 printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
353                 printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
354                        FIX_BTMAP_BEGIN);
355         }
356 }
357
358 void __init early_ioremap_clear(void)
359 {
360         pmd_t *pmd;
361
362         if (early_ioremap_debug)
363                 printk(KERN_INFO "early_ioremap_clear()\n");
364
365         pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
366         pmd_clear(pmd);
367         paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
368         __flush_tlb_all();
369 }
370
371 void __init early_ioremap_reset(void)
372 {
373         enum fixed_addresses idx;
374         unsigned long addr, phys;
375         pte_t *pte;
376
377         after_paging_init = 1;
378         for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
379                 addr = fix_to_virt(idx);
380                 pte = early_ioremap_pte(addr);
381                 if (pte_present(*pte)) {
382                         phys = pte_val(*pte) & PAGE_MASK;
383                         set_fixmap(idx, phys);
384                 }
385         }
386 }
387
388 static void __init __early_set_fixmap(enum fixed_addresses idx,
389                                    unsigned long phys, pgprot_t flags)
390 {
391         unsigned long addr = __fix_to_virt(idx);
392         pte_t *pte;
393
394         if (idx >= __end_of_fixed_addresses) {
395                 BUG();
396                 return;
397         }
398         pte = early_ioremap_pte(addr);
399         if (pgprot_val(flags))
400                 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
401         else
402                 pte_clear(NULL, addr, pte);
403         __flush_tlb_one(addr);
404 }
405
406 static inline void __init early_set_fixmap(enum fixed_addresses idx,
407                                         unsigned long phys)
408 {
409         if (after_paging_init)
410                 set_fixmap(idx, phys);
411         else
412                 __early_set_fixmap(idx, phys, PAGE_KERNEL);
413 }
414
415 static inline void __init early_clear_fixmap(enum fixed_addresses idx)
416 {
417         if (after_paging_init)
418                 clear_fixmap(idx);
419         else
420                 __early_set_fixmap(idx, 0, __pgprot(0));
421 }
422
423
424 int __initdata early_ioremap_nested;
425
426 static int __init check_early_ioremap_leak(void)
427 {
428         if (!early_ioremap_nested)
429                 return 0;
430
431         printk(KERN_WARNING
432                "Debug warning: early ioremap leak of %d areas detected.\n",
433                early_ioremap_nested);
434         printk(KERN_WARNING
435                "please boot with early_ioremap_debug and report the dmesg.\n");
436         WARN_ON(1);
437
438         return 1;
439 }
440 late_initcall(check_early_ioremap_leak);
441
442 void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
443 {
444         unsigned long offset, last_addr;
445         unsigned int nrpages, nesting;
446         enum fixed_addresses idx0, idx;
447
448         WARN_ON(system_state != SYSTEM_BOOTING);
449
450         nesting = early_ioremap_nested;
451         if (early_ioremap_debug) {
452                 printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
453                        phys_addr, size, nesting);
454                 dump_stack();
455         }
456
457         /* Don't allow wraparound or zero size */
458         last_addr = phys_addr + size - 1;
459         if (!size || last_addr < phys_addr) {
460                 WARN_ON(1);
461                 return NULL;
462         }
463
464         if (nesting >= FIX_BTMAPS_NESTING) {
465                 WARN_ON(1);
466                 return NULL;
467         }
468         early_ioremap_nested++;
469         /*
470          * Mappings have to be page-aligned
471          */
472         offset = phys_addr & ~PAGE_MASK;
473         phys_addr &= PAGE_MASK;
474         size = PAGE_ALIGN(last_addr) - phys_addr;
475
476         /*
477          * Mappings have to fit in the FIX_BTMAP area.
478          */
479         nrpages = size >> PAGE_SHIFT;
480         if (nrpages > NR_FIX_BTMAPS) {
481                 WARN_ON(1);
482                 return NULL;
483         }
484
485         /*
486          * Ok, go for it..
487          */
488         idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
489         idx = idx0;
490         while (nrpages > 0) {
491                 early_set_fixmap(idx, phys_addr);
492                 phys_addr += PAGE_SIZE;
493                 --idx;
494                 --nrpages;
495         }
496         if (early_ioremap_debug)
497                 printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
498
499         return (void *) (offset + fix_to_virt(idx0));
500 }
501
502 void __init early_iounmap(void *addr, unsigned long size)
503 {
504         unsigned long virt_addr;
505         unsigned long offset;
506         unsigned int nrpages;
507         enum fixed_addresses idx;
508         unsigned int nesting;
509
510         nesting = --early_ioremap_nested;
511         WARN_ON(nesting < 0);
512
513         if (early_ioremap_debug) {
514                 printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
515                        size, nesting);
516                 dump_stack();
517         }
518
519         virt_addr = (unsigned long)addr;
520         if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
521                 WARN_ON(1);
522                 return;
523         }
524         offset = virt_addr & ~PAGE_MASK;
525         nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
526
527         idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
528         while (nrpages > 0) {
529                 early_clear_fixmap(idx);
530                 --idx;
531                 --nrpages;
532         }
533 }
534
535 void __this_fixmap_does_not_exist(void)
536 {
537         WARN_ON(1);
538 }
539
540 #endif /* CONFIG_X86_32 */