Merge branch 'linus' into x86/bootmem
[linux-2.6.git] / arch / x86 / mm / init.c
index 92d2108..b8054e0 100644 (file)
@@ -1,5 +1,8 @@
+#include <linux/gfp.h>
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
+#include <linux/memblock.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820.h>
@@ -11,6 +14,7 @@
 #include <asm/system.h>
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
+#include <asm/proto.h>
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
@@ -26,73 +30,11 @@ int direct_gbpages
 #endif
 ;
 
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-       if (!str)
-               return -EINVAL;
-       if (!strncmp(str, "on", 2)) {
-               __supported_pte_mask |= _PAGE_NX;
-               disable_nx = 0;
-       } else if (!strncmp(str, "off", 3)) {
-               disable_nx = 1;
-               __supported_pte_mask &= ~_PAGE_NX;
-       }
-       return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
-       unsigned int v[4], l, h;
-
-       if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-               cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-               if ((v[3] & (1 << 20)) && !disable_nx) {
-                       rdmsr(MSR_EFER, l, h);
-                       l |= EFER_NX;
-                       wrmsr(MSR_EFER, l, h);
-                       nx_enabled = 1;
-                       __supported_pte_mask |= _PAGE_NX;
-               }
-       }
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
-       unsigned long efer;
-
-       rdmsrl(MSR_EFER, efer);
-       if (!(efer & EFER_NX) || disable_nx)
-               __supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
 static void __init find_early_table_space(unsigned long end, int use_pse,
                                          int use_gbpages)
 {
-       unsigned long puds, pmds, ptes, tables, start;
+       unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+       phys_addr_t base;
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
        tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
@@ -123,24 +65,15 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 #ifdef CONFIG_X86_32
        /* for fixmap */
        tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
 
-       /*
-        * RED-PEN putting page tables only on node 0 could
-        * cause a hotspot and fill up ZONE_DMA. The page tables
-        * need roughly 0.5KB per GB.
-        */
-#ifdef CONFIG_X86_32
-       start = 0x7000;
-#else
-       start = 0x8000;
+       good_end = max_pfn_mapped << PAGE_SHIFT;
 #endif
-       e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
-                                       tables, PAGE_SIZE);
-       if (e820_table_start == -1UL)
+
+       base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
+       if (base == MEMBLOCK_ERROR)
                panic("Cannot find space for the kernel page tables");
 
-       e820_table_start >>= PAGE_SHIFT;
+       e820_table_start = base >> PAGE_SHIFT;
        e820_table_end = e820_table_start;
        e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
 
@@ -176,20 +109,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
        return nr_range;
 }
 
-#ifdef CONFIG_X86_64
-static void __init init_gbpages(void)
-{
-       if (direct_gbpages && cpu_has_gbpages)
-               printk(KERN_INFO "Using GB pages for direct mapping\n");
-       else
-               direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
@@ -209,10 +128,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 
        printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
 
-       if (!after_bootmem)
-               init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
        /*
         * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
         * This will simplify cpa(), which otherwise needs to support splitting
@@ -224,10 +140,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        use_gbpages = direct_gbpages;
 #endif
 
-       set_nx();
-       if (nx_enabled)
-               printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-
        /* Enable PSE if available */
        if (cpu_has_pse)
                set_in_cr4(X86_CR4_PSE);
@@ -348,16 +260,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        if (!after_bootmem)
                find_early_table_space(end, use_pse, use_gbpages);
 
-#ifdef CONFIG_X86_32
-       for (i = 0; i < nr_range; i++)
-               kernel_physical_mapping_init(mr[i].start, mr[i].end,
-                                            mr[i].page_size_mask);
-       ret = end;
-#else /* CONFIG_X86_64 */
        for (i = 0; i < nr_range; i++)
                ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
                                                   mr[i].page_size_mask);
-#endif
 
 #ifdef CONFIG_X86_32
        early_ioremap_page_table_range_init();
@@ -365,29 +270,10 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        load_cr3(swapper_pg_dir);
 #endif
 
-#ifdef CONFIG_X86_64
-       if (!after_bootmem && !start) {
-               pud_t *pud;
-               pmd_t *pmd;
-
-               mmu_cr4_features = read_cr4();
-
-               /*
-                * _brk_end cannot change anymore, but it and _end may be
-                * located on different 2M pages. cleanup_highmap(), however,
-                * can only consider _end when it runs, so destroy any
-                * mappings beyond _brk_end here.
-                */
-               pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
-               pmd = pmd_offset(pud, _brk_end - 1);
-               while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
-                       pmd_clear(pmd);
-       }
-#endif
        __flush_tlb_all();
 
        if (!after_bootmem && e820_table_end > e820_table_start)
-               reserve_early(e820_table_start << PAGE_SHIFT,
+               memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
                                 e820_table_end << PAGE_SHIFT, "PGTABLE");
 
        if (!after_bootmem)
@@ -420,11 +306,23 @@ int devmem_is_allowed(unsigned long pagenr)
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
-       unsigned long addr = begin;
+       unsigned long addr;
+       unsigned long begin_aligned, end_aligned;
+
+       /* Make sure boundaries are page aligned */
+       begin_aligned = PAGE_ALIGN(begin);
+       end_aligned   = end & PAGE_MASK;
+
+       if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
+               begin = begin_aligned;
+               end   = end_aligned;
+       }
 
-       if (addr >= end)
+       if (begin >= end)
                return;
 
+       addr = begin;
+
        /*
         * If debugging page accesses then do not free this memory but
         * mark them not present - any buggy init-section access will
@@ -432,14 +330,15 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
         */
 #ifdef CONFIG_DEBUG_PAGEALLOC
        printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
-               begin, PAGE_ALIGN(end));
+               begin, end);
        set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 #else
        /*
         * We just marked the kernel text read only above, now that
         * we are going to free part of that, we need to make that
-        * writeable first.
+        * writeable and non-executable first.
         */
+       set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
        set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
        printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
@@ -447,8 +346,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
        for (; addr < end; addr += PAGE_SIZE) {
                ClearPageReserved(virt_to_page(addr));
                init_page_count(virt_to_page(addr));
-               memset((void *)(addr & ~(PAGE_SIZE-1)),
-                       POISON_FREE_INITMEM, PAGE_SIZE);
+               memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
                free_page(addr);
                totalram_pages++;
        }
@@ -465,6 +363,15 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-       free_init_pages("initrd memory", start, end);
+       /*
+        * end could be not aligned, and We can not align that,
+        * decompresser could be confused by aligned initrd_end
+        * We already reserve the end partial page before in
+        *   - i386_start_kernel()
+        *   - x86_64_start_kernel()
+        *   - relocate_initrd()
+        * So here We can do PAGE_ALIGN() safely to get partial page to be freed
+        */
+       free_init_pages("initrd memory", start, PAGE_ALIGN(end));
 }
 #endif