]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/x86/kernel/setup.c
x86: Make sure free_init_pages() frees pages on page boundary
[linux-2.6.git] / arch / x86 / kernel / setup.c
index 5796eb158d49bccee884bf738756cd32a939a700..d76e18570c60ec5b8c7e28685a3fe69751d6c575 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/screen_info.h>
 #include <linux/ioport.h>
 #include <linux/acpi.h>
+#include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
 #include <asm/mtrr.h>
 #include <asm/apic.h>
+#include <asm/trampoline.h>
 #include <asm/e820.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
 #include <asm/percpu.h>
 #include <asm/topology.h>
 #include <asm/apicdef.h>
+#include <asm/k8.h>
 #ifdef CONFIG_X86_64
 #include <asm/numa_64.h>
 #endif
-
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
+#include <asm/mce.h>
 
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
+#ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
+#endif
 
 unsigned int boot_cpu_id __read_mostly;
 
@@ -133,9 +136,9 @@ int default_cpu_present_to_apicid(int mps_cpu)
        return __default_cpu_present_to_apicid(mps_cpu);
 }
 
-int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+int default_check_phys_apicid_present(int phys_apicid)
 {
-       return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+       return __default_check_phys_apicid_present(phys_apicid);
 }
 #endif
 
@@ -171,13 +174,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-static struct resource video_ram_resource = {
-       .name   = "Video RAM area",
-       .start  = 0xa0000,
-       .end    = 0xbffff,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
 /* common cpu data for all cpus */
@@ -256,7 +252,7 @@ EXPORT_SYMBOL(edd);
  *              from boot_params into a safe place.
  *
  */
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
 {
      memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
            sizeof(edd.mbr_signature));
@@ -265,7 +261,7 @@ static inline void copy_edd(void)
      edd.edd_info_nr = boot_params.eddbuf_entries;
 }
 #else
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
 {
 }
 #endif
@@ -318,16 +314,17 @@ static void __init reserve_brk(void)
 #define MAX_MAP_CHUNK  (NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
-
+       /* Assume only end is not page aligned */
        u64 ramdisk_image = boot_params.hdr.ramdisk_image;
        u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
+       u64 area_size     = PAGE_ALIGN(ramdisk_size);
        u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
        u64 ramdisk_here;
        unsigned long slop, clen, mapaddr;
        char *p, *q;
 
        /* We need to move the initrd down into lowmem */
-       ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
+       ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
                                         PAGE_SIZE);
 
        if (ramdisk_here == -1ULL)
@@ -336,7 +333,7 @@ static void __init relocate_initrd(void)
 
        /* Note: this includes all the lowmem currently occupied by
           the initrd, we rely on that fact to keep the data intact. */
-       reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
+       reserve_early(ramdisk_here, ramdisk_here + area_size,
                         "NEW RAMDISK");
        initrd_start = ramdisk_here + PAGE_OFFSET;
        initrd_end   = initrd_start + ramdisk_size;
@@ -380,9 +377,10 @@ static void __init relocate_initrd(void)
 
 static void __init reserve_initrd(void)
 {
+       /* Assume only end is not page aligned */
        u64 ramdisk_image = boot_params.hdr.ramdisk_image;
        u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-       u64 ramdisk_end   = ramdisk_image + ramdisk_size;
+       u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
        u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
        if (!boot_params.hdr.type_of_loader ||
@@ -495,42 +493,11 @@ static void __init reserve_early_setup_data(void)
 
 #ifdef CONFIG_KEXEC
 
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
-       const unsigned long long alignment = 16<<20;    /* 16M */
-       unsigned long long start = 0LL;
-
-       while (1) {
-               int ret;
-
-               start = find_e820_area(start, ULONG_MAX, size, alignment);
-               if (start == -1ULL)
-                       return start;
-
-               /* try to reserve it */
-               ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
-               if (ret >= 0)
-                       return start;
-
-               start += alignment;
-       }
-}
-
 static inline unsigned long long get_total_mem(void)
 {
        unsigned long long total;
 
-       total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
-       total += highend_pfn - highstart_pfn;
-#endif
+       total = max_pfn - min_low_pfn;
 
        return total << PAGE_SHIFT;
 }
@@ -550,21 +517,25 @@ static void __init reserve_crashkernel(void)
 
        /* 0 means: find the address automatically */
        if (crash_base <= 0) {
-               crash_base = find_and_reserve_crashkernel(crash_size);
+               const unsigned long long alignment = 16<<20;    /* 16M */
+
+               crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+                                alignment);
                if (crash_base == -1ULL) {
-                       pr_info("crashkernel reservation failed. "
-                               "No suitable area found.\n");
+                       pr_info("crashkernel reservation failed - No suitable area found.\n");
                        return;
                }
        } else {
-               ret = reserve_bootmem_generic(crash_base, crash_size,
-                                       BOOTMEM_EXCLUSIVE);
-               if (ret < 0) {
-                       pr_info("crashkernel reservation failed - "
-                               "memory is in use\n");
+               unsigned long long start;
+
+               start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+                                1<<20);
+               if (start != crash_base) {
+                       pr_info("crashkernel reservation failed - memory is in use.\n");
                        return;
                }
        }
+       reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
 
        printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
                        "for crashkernel (System RAM: %ldMB)\n",
@@ -605,7 +576,7 @@ static struct resource standard_io_resources[] = {
                .flags = IORESOURCE_BUSY | IORESOURCE_IO }
 };
 
-static void __init reserve_standard_io_resources(void)
+void __init reserve_standard_io_resources(void)
 {
        int i;
 
@@ -637,10 +608,6 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
@@ -673,22 +640,54 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
                },
        },
        {
+               .callback = dmi_low_memory_corruption,
+               .ident = "Phoenix/MSC BIOS",
+               .matches = {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
+               },
+       },
        /*
-        * AMI BIOS with low memory corruption was found on Intel DG45ID board.
-        * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+        * AMI BIOS with low memory corruption was found on Intel DG45ID and
+        * DG45FC boards.
+        * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
         * match only DMI_BOARD_NAME and see if there is more bad products
         * with this vendor.
         */
+       {
                .callback = dmi_low_memory_corruption,
                .ident = "AMI BIOS",
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
                },
        },
+       {
+               .callback = dmi_low_memory_corruption,
+               .ident = "AMI BIOS",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
+               },
+       },
 #endif
        {}
 };
 
+static void __init trim_bios_range(void)
+{
+       /*
+        * A special case is the first 4Kb of memory;
+        * This is a BIOS owned area, not kernel ram, but generally
+        * not listed as such in the E820 table.
+        */
+       e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+       /*
+        * special case: Some BIOSen report the PC BIOS
+        * area (640->1Mb) as ram even though it is not.
+        * take them out.
+        */
+       e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -704,6 +703,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 
 void __init setup_arch(char **cmdline_p)
 {
+       int acpi = 0;
+       int k8 = 0;
+
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
        visws_early_detect();
@@ -757,7 +759,7 @@ void __init setup_arch(char **cmdline_p)
        }
 #endif
 
-       ARCH_SETUP
+       x86_init.oem.arch_setup();
 
        setup_memory_map();
        parse_setup_data();
@@ -796,11 +798,18 @@ void __init setup_arch(char **cmdline_p)
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
+       /*
+        * x86_configure_nx() is called before parse_early_param() to detect
+        * whether hardware doesn't support NX (so that the early EHCI debug
+        * console setup can safely call set_fixmap()). It may then be called
+        * again from within noexec_setup() during parsing early parameters
+        * to honor the respective command line option.
+        */
+       x86_configure_nx();
+
        parse_early_param();
 
-#ifdef CONFIG_X86_64
-       check_efer();
-#endif
+       x86_report_nx();
 
        /* Must be before kernel pagetables are setup */
        vmi_activate();
@@ -833,7 +842,7 @@ void __init setup_arch(char **cmdline_p)
         * VMware detection requires dmi to be available, so this
         * needs to be done after dmi_scan_machine, for the BP.
         */
-       init_hypervisor(&boot_cpu_data);
+       init_hypervisor_platform();
 
        x86_init.resources.probe_roms();
 
@@ -842,7 +851,7 @@ void __init setup_arch(char **cmdline_p)
        insert_resource(&iomem_resource, &data_resource);
        insert_resource(&iomem_resource, &bss_resource);
 
-
+       trim_bios_range();
 #ifdef CONFIG_X86_32
        if (ppro_with_ram_bug()) {
                e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
@@ -896,6 +905,20 @@ void __init setup_arch(char **cmdline_p)
 
        reserve_brk();
 
+       /*
+        * Find and reserve possible boot-time SMP configuration:
+        */
+       find_smp_config();
+
+       reserve_trampoline_memory();
+
+#ifdef CONFIG_ACPI_SLEEP
+       /*
+        * Reserve low memory region for sleep support.
+        * even before init_memory_mapping
+        */
+       acpi_reserve_wakeup_memory();
+#endif
        init_gbpages();
 
        /* max_pfn_mapped is updated here */
@@ -922,6 +945,8 @@ void __init setup_arch(char **cmdline_p)
 
        reserve_initrd();
 
+       reserve_crashkernel();
+
        vsmp_init();
 
        io_delay_init();
@@ -937,32 +962,20 @@ void __init setup_arch(char **cmdline_p)
        /*
         * Parse SRAT to discover nodes.
         */
-       acpi_numa_init();
+       acpi = acpi_numa_init();
 #endif
 
-       initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
-       /*
-        * Reserve low memory region for sleep support.
-        */
-       acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+       if (!acpi)
+               k8 = !k8_numa_init(0, max_pfn);
 #endif
-       /*
-        * Find and reserve possible boot-time SMP configuration:
-        */
-       find_smp_config();
 
-       reserve_crashkernel();
+       initmem_init(0, max_pfn, acpi, k8);
+#ifndef CONFIG_NO_BOOTMEM
+       early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+#endif
 
-#ifdef CONFIG_X86_64
-       /*
-        * dma32_reserve_bootmem() allocates bootmem which may conflict
-        * with the crashkernel command line, so do that after
-        * reserve_crashkernel()
-        */
        dma32_reserve_bootmem();
-#endif
 
        reserve_ibft_region();
 
@@ -970,10 +983,11 @@ void __init setup_arch(char **cmdline_p)
        kvmclock_init();
 #endif
 
-       paravirt_pagetable_setup_start(swapper_pg_dir);
+       x86_init.paging.pagetable_setup_start(swapper_pg_dir);
        paging_init();
-       paravirt_pagetable_setup_done(swapper_pg_dir);
-       paravirt_post_allocator_init();
+       x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+
+       tboot_probe();
 
 #ifdef CONFIG_X86_64
        map_vsyscall();
@@ -988,13 +1002,13 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_boot_init();
 
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+       sfi_init();
+
        /*
         * get boot-time SMP configuration:
         */
        if (smp_found_config)
                get_smp_config();
-#endif
 
        prefill_possible_map();
 
@@ -1013,10 +1027,7 @@ void __init setup_arch(char **cmdline_p)
        e820_reserve_resources();
        e820_mark_nosave_regions(max_low_pfn);
 
-#ifdef CONFIG_X86_32
-       request_resource(&iomem_resource, &video_ram_resource);
-#endif
-       reserve_standard_io_resources();
+       x86_init.resources.reserve_resources();
 
        e820_setup_gap();
 
@@ -1028,78 +1039,24 @@ void __init setup_arch(char **cmdline_p)
        conswitchp = &dummy_con;
 #endif
 #endif
-}
-
-#ifdef CONFIG_X86_32
+       x86_init.oem.banner();
 
-/**
- * x86_quirk_intr_init - post gate setup interrupt initialisation
- *
- * Description:
- *     Fill in any interrupts that may have been left out by the general
- *     init_IRQ() routine.  interrupts having to do with the machine rather
- *     than the devices on the I/O bus (like APIC interrupts in intel MP
- *     systems) are started here.
- **/
-void __init x86_quirk_intr_init(void)
-{
-       if (x86_quirks->arch_intr_init) {
-               if (x86_quirks->arch_intr_init())
-                       return;
-       }
+       mcheck_init();
 }
 
-/**
- * x86_quirk_trap_init - initialise system specific traps
- *
- * Description:
- *     Called as the final act of trap_init().  Used in VISWS to initialise
- *     the various board specific APIC traps.
- **/
-void __init x86_quirk_trap_init(void)
-{
-       if (x86_quirks->arch_trap_init) {
-               if (x86_quirks->arch_trap_init())
-                       return;
-       }
-}
+#ifdef CONFIG_X86_32
 
-static struct irqaction irq0  = {
-       .handler = timer_interrupt,
-       .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
-       .name = "timer"
+static struct resource video_ram_resource = {
+       .name   = "Video RAM area",
+       .start  = 0xa0000,
+       .end    = 0xbffff,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-/**
- * x86_quirk_pre_time_init - do any specific initialisations before.
- *
- **/
-void __init x86_quirk_pre_time_init(void)
+void __init i386_reserve_resources(void)
 {
-       if (x86_quirks->arch_pre_time_init)
-               x86_quirks->arch_pre_time_init();
+       request_resource(&iomem_resource, &video_ram_resource);
+       reserve_standard_io_resources();
 }
 
-/**
- * x86_quirk_time_init - do any specific initialisations for the system timer.
- *
- * Description:
- *     Must plug the system timer interrupt source at HZ into the IRQ listed
- *     in irq_vectors.h:TIMER_IRQ
- **/
-void __init x86_quirk_time_init(void)
-{
-       if (x86_quirks->arch_time_init) {
-               /*
-                * A nonzero return code does not mean failure, it means
-                * that the architecture quirk does not want any
-                * generic (timer) setup to be performed after this:
-                */
-               if (x86_quirks->arch_time_init())
-                       return;
-       }
-
-       irq0.mask = cpumask_of_cpu(0);
-       setup_irq(0, &irq0);
-}
 #endif /* CONFIG_X86_32 */