2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/sched.h>
28 #include <linux/mmzone.h>
29 #include <linux/tty.h>
30 #include <linux/ioport.h>
31 #include <linux/acpi.h>
32 #include <linux/apm_bios.h>
33 #include <linux/initrd.h>
34 #include <linux/bootmem.h>
35 #include <linux/seq_file.h>
36 #include <linux/console.h>
37 #include <linux/mca.h>
38 #include <linux/root_dev.h>
39 #include <linux/highmem.h>
40 #include <linux/module.h>
41 #include <linux/efi.h>
42 #include <linux/init.h>
43 #include <linux/edd.h>
44 #include <linux/nodemask.h>
45 #include <video/edid.h>
47 #include <asm/mpspec.h>
48 #include <asm/setup.h>
49 #include <asm/arch_hooks.h>
50 #include <asm/sections.h>
51 #include <asm/io_apic.h>
54 #include "setup_arch_pre.h"
55 #include <bios_ebda.h>
57 /* This value is set up by the early boot code to point to the value
58 immediately after the boot time page tables. It contains a *physical*
59 address, and must not be in the .bss segment! */
60 unsigned long init_pg_tables_end __initdata = ~0UL;
62 int disable_pse __initdata = 0;
70 EXPORT_SYMBOL(efi_enabled);
73 /* cpu data as detected by the assembly code in head.S */
74 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
75 /* common cpu data for all cpus */
76 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
78 unsigned long mmu_cr4_features;
80 #ifdef CONFIG_ACPI_INTERPRETER
81 int acpi_disabled = 0;
83 int acpi_disabled = 1;
85 EXPORT_SYMBOL(acpi_disabled);
87 #ifdef CONFIG_ACPI_BOOT
88 int __initdata acpi_force = 0;
89 extern acpi_interrupt_flags acpi_sci_flags;
92 /* for MCA, but anyone else can use it if they want */
93 unsigned int machine_id;
94 unsigned int machine_submodel_id;
95 unsigned int BIOS_revision;
96 unsigned int mca_pentium_flag;
98 /* For PCI or other memory-mapped resources */
99 unsigned long pci_mem_start = 0x10000000;
101 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
104 /* user-defined highmem size */
105 static unsigned int highmem_pages = -1;
110 struct drive_info_struct { char dummy[32]; } drive_info;
111 struct screen_info screen_info;
112 struct apm_info apm_info;
113 struct sys_desc_table_struct {
114 unsigned short length;
115 unsigned char table[0];
117 struct edid_info edid_info;
118 struct ist_info ist_info;
121 extern void early_cpu_init(void);
122 extern void dmi_scan_machine(void);
123 extern void generic_apic_probe(char *);
124 extern int root_mountflags;
126 unsigned long saved_videomode;
128 #define RAMDISK_IMAGE_START_MASK 0x07FF
129 #define RAMDISK_PROMPT_FLAG 0x8000
130 #define RAMDISK_LOAD_FLAG 0x4000
132 static char command_line[COMMAND_LINE_SIZE];
134 unsigned char __initdata boot_params[PARAM_SIZE];
136 static struct resource data_resource = {
137 .name = "Kernel data",
140 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
143 static struct resource code_resource = {
144 .name = "Kernel code",
147 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
150 static struct resource system_rom_resource = {
151 .name = "System ROM",
154 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
157 static struct resource extension_rom_resource = {
158 .name = "Extension ROM",
161 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
164 static struct resource adapter_rom_resources[] = { {
165 .name = "Adapter ROM",
168 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
170 .name = "Adapter ROM",
173 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
175 .name = "Adapter ROM",
178 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
180 .name = "Adapter ROM",
183 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
185 .name = "Adapter ROM",
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190 .name = "Adapter ROM",
193 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196 #define ADAPTER_ROM_RESOURCES \
197 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
199 static struct resource video_rom_resource = {
203 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
206 static struct resource video_ram_resource = {
207 .name = "Video RAM area",
210 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
213 static struct resource standard_io_resources[] = { {
217 .flags = IORESOURCE_BUSY | IORESOURCE_IO
222 .flags = IORESOURCE_BUSY | IORESOURCE_IO
227 .flags = IORESOURCE_BUSY | IORESOURCE_IO
232 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237 .flags = IORESOURCE_BUSY | IORESOURCE_IO
239 .name = "dma page reg",
242 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257 .flags = IORESOURCE_BUSY | IORESOURCE_IO
260 #define STANDARD_IO_RESOURCES \
261 (sizeof standard_io_resources / sizeof standard_io_resources[0])
263 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
265 static int __init romchecksum(unsigned char *rom, unsigned long length)
267 unsigned char *p, sum = 0;
269 for (p = rom; p < rom + length; p++)
274 static void __init probe_roms(void)
276 unsigned long start, length, upper;
281 upper = adapter_rom_resources[0].start;
282 for (start = video_rom_resource.start; start < upper; start += 2048) {
283 rom = isa_bus_to_virt(start);
284 if (!romsignature(rom))
287 video_rom_resource.start = start;
289 /* 0 < length <= 0x7f * 512, historically */
290 length = rom[2] * 512;
292 /* if checksum okay, trust length byte */
293 if (length && romchecksum(rom, length))
294 video_rom_resource.end = start + length - 1;
296 request_resource(&iomem_resource, &video_rom_resource);
300 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
305 request_resource(&iomem_resource, &system_rom_resource);
306 upper = system_rom_resource.start;
308 /* check for extension rom (ignore length byte!) */
309 rom = isa_bus_to_virt(extension_rom_resource.start);
310 if (romsignature(rom)) {
311 length = extension_rom_resource.end - extension_rom_resource.start + 1;
312 if (romchecksum(rom, length)) {
313 request_resource(&iomem_resource, &extension_rom_resource);
314 upper = extension_rom_resource.start;
318 /* check for adapter roms on 2k boundaries */
319 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
320 rom = isa_bus_to_virt(start);
321 if (!romsignature(rom))
324 /* 0 < length <= 0x7f * 512, historically */
325 length = rom[2] * 512;
327 /* but accept any length that fits if checksum okay */
328 if (!length || start + length > upper || !romchecksum(rom, length))
331 adapter_rom_resources[i].start = start;
332 adapter_rom_resources[i].end = start + length - 1;
333 request_resource(&iomem_resource, &adapter_rom_resources[i]);
335 start = adapter_rom_resources[i++].end & ~2047UL;
339 static void __init limit_regions(unsigned long long size)
341 unsigned long long current_addr = 0;
345 for (i = 0; i < memmap.nr_map; i++) {
346 current_addr = memmap.map[i].phys_addr +
347 (memmap.map[i].num_pages << 12);
348 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
349 if (current_addr >= size) {
350 memmap.map[i].num_pages -=
351 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
352 memmap.nr_map = i + 1;
358 for (i = 0; i < e820.nr_map; i++) {
359 if (e820.map[i].type == E820_RAM) {
360 current_addr = e820.map[i].addr + e820.map[i].size;
361 if (current_addr >= size) {
362 e820.map[i].size -= current_addr-size;
370 static void __init add_memory_region(unsigned long long start,
371 unsigned long long size, int type)
379 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
383 e820.map[x].addr = start;
384 e820.map[x].size = size;
385 e820.map[x].type = type;
388 } /* add_memory_region */
392 static void __init print_memory_map(char *who)
396 for (i = 0; i < e820.nr_map; i++) {
397 printk(" %s: %016Lx - %016Lx ", who,
399 e820.map[i].addr + e820.map[i].size);
400 switch (e820.map[i].type) {
401 case E820_RAM: printk("(usable)\n");
404 printk("(reserved)\n");
407 printk("(ACPI data)\n");
410 printk("(ACPI NVS)\n");
412 default: printk("type %lu\n", e820.map[i].type);
419 * Sanitize the BIOS e820 map.
421 * Some e820 responses include overlapping entries. The following
422 * replaces the original e820 map with a new one, removing overlaps.
425 struct change_member {
426 struct e820entry *pbios; /* pointer to original bios entry */
427 unsigned long long addr; /* address for this change point */
429 static struct change_member change_point_list[2*E820MAX] __initdata;
430 static struct change_member *change_point[2*E820MAX] __initdata;
431 static struct e820entry *overlap_list[E820MAX] __initdata;
432 static struct e820entry new_bios[E820MAX] __initdata;
434 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
436 struct change_member *change_tmp;
437 unsigned long current_type, last_type;
438 unsigned long long last_addr;
439 int chgidx, still_changing;
442 int old_nr, new_nr, chg_nr;
446 Visually we're performing the following (1,2,3,4 = memory types)...
448 Sample memory map (w/overlaps):
449 ____22__________________
450 ______________________4_
451 ____1111________________
452 _44_____________________
453 11111111________________
454 ____________________33__
455 ___________44___________
456 __________33333_________
457 ______________22________
458 ___________________2222_
459 _________111111111______
460 _____________________11_
461 _________________4______
463 Sanitized equivalent (no overlap):
464 1_______________________
465 _44_____________________
466 ___1____________________
467 ____22__________________
468 ______11________________
469 _________1______________
470 __________3_____________
471 ___________44___________
472 _____________33_________
473 _______________2________
474 ________________1_______
475 _________________4______
476 ___________________2____
477 ____________________33__
478 ______________________4_
481 /* if there's only one memory region, don't bother */
487 /* bail out if we find any unreasonable addresses in bios map */
488 for (i=0; i<old_nr; i++)
489 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
492 /* create pointers for initial change-point information (for sorting) */
493 for (i=0; i < 2*old_nr; i++)
494 change_point[i] = &change_point_list[i];
496 /* record all known change-points (starting and ending addresses),
497 omitting those that are for empty memory regions */
499 for (i=0; i < old_nr; i++) {
500 if (biosmap[i].size != 0) {
501 change_point[chgidx]->addr = biosmap[i].addr;
502 change_point[chgidx++]->pbios = &biosmap[i];
503 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
504 change_point[chgidx++]->pbios = &biosmap[i];
507 chg_nr = chgidx; /* true number of change-points */
509 /* sort change-point list by memory addresses (low -> high) */
511 while (still_changing) {
513 for (i=1; i < chg_nr; i++) {
514 /* if <current_addr> > <last_addr>, swap */
515 /* or, if current=<start_addr> & last=<end_addr>, swap */
516 if ((change_point[i]->addr < change_point[i-1]->addr) ||
517 ((change_point[i]->addr == change_point[i-1]->addr) &&
518 (change_point[i]->addr == change_point[i]->pbios->addr) &&
519 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
522 change_tmp = change_point[i];
523 change_point[i] = change_point[i-1];
524 change_point[i-1] = change_tmp;
530 /* create a new bios memory map, removing overlaps */
531 overlap_entries=0; /* number of entries in the overlap table */
532 new_bios_entry=0; /* index for creating new bios map entries */
533 last_type = 0; /* start with undefined memory type */
534 last_addr = 0; /* start with 0 as last starting address */
535 /* loop through change-points, determining affect on the new bios map */
536 for (chgidx=0; chgidx < chg_nr; chgidx++)
538 /* keep track of all overlapping bios entries */
539 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
541 /* add map entry to overlap list (> 1 entry implies an overlap) */
542 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
546 /* remove entry from list (order independent, so swap with last) */
547 for (i=0; i<overlap_entries; i++)
549 if (overlap_list[i] == change_point[chgidx]->pbios)
550 overlap_list[i] = overlap_list[overlap_entries-1];
554 /* if there are overlapping entries, decide which "type" to use */
555 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
557 for (i=0; i<overlap_entries; i++)
558 if (overlap_list[i]->type > current_type)
559 current_type = overlap_list[i]->type;
560 /* continue building up new bios map based on this information */
561 if (current_type != last_type) {
562 if (last_type != 0) {
563 new_bios[new_bios_entry].size =
564 change_point[chgidx]->addr - last_addr;
565 /* move forward only if the new size was non-zero */
566 if (new_bios[new_bios_entry].size != 0)
567 if (++new_bios_entry >= E820MAX)
568 break; /* no more space left for new bios entries */
570 if (current_type != 0) {
571 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
572 new_bios[new_bios_entry].type = current_type;
573 last_addr=change_point[chgidx]->addr;
575 last_type = current_type;
578 new_nr = new_bios_entry; /* retain count for new bios entries */
580 /* copy new bios mapping into original location */
581 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
588 * Copy the BIOS e820 map into a safe place.
590 * Sanity-check it while we're at it..
592 * If we're lucky and live on a modern system, the setup code
593 * will have given us a memory map that we can use to properly
594 * set up memory. If we aren't, we'll fake a memory map.
596 * We check to see that the memory map contains at least 2 elements
597 * before we'll use it, because the detection code in setup.S may
598 * not be perfect and most every PC known to man has two memory
599 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
600 * thinkpad 560x, for example, does not cooperate with the memory
603 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
605 /* Only one memory region (or negative)? Ignore it */
610 unsigned long long start = biosmap->addr;
611 unsigned long long size = biosmap->size;
612 unsigned long long end = start + size;
613 unsigned long type = biosmap->type;
615 /* Overflow in 64 bits? Ignore the memory map. */
620 * Some BIOSes claim RAM in the 640k - 1M region.
621 * Not right. Fix it up.
623 if (type == E820_RAM) {
624 if (start < 0x100000ULL && end > 0xA0000ULL) {
625 if (start < 0xA0000ULL)
626 add_memory_region(start, 0xA0000ULL-start, type);
627 if (end <= 0x100000ULL)
633 add_memory_region(start, size, type);
634 } while (biosmap++,--nr_map);
638 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
640 #ifdef CONFIG_EDD_MODULE
644 * copy_edd() - Copy the BIOS EDD information
645 * from boot_params into a safe place.
648 static inline void copy_edd(void)
650 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
651 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
652 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
653 edd.edd_info_nr = EDD_NR;
656 static inline void copy_edd(void)
662 * Do NOT EVER look at the BIOS memory size location.
663 * It does not work on many machines.
665 #define LOWMEMSIZE() (0x9f000)
667 static void __init parse_cmdline_early (char ** cmdline_p)
669 char c = ' ', *to = command_line, *from = saved_command_line;
673 /* Save unparsed command line copy for /proc/cmdline */
674 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
680 * "mem=nopentium" disables the 4MB page tables.
681 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
682 * to <mem>, overriding the bios size.
683 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
684 * <start> to <start>+<mem>, overriding the bios size.
686 * HPA tells me bootloaders need to parse mem=, so no new
687 * option should be mem= [also see Documentation/i386/boot.txt]
689 if (!memcmp(from, "mem=", 4)) {
690 if (to != command_line)
692 if (!memcmp(from+4, "nopentium", 9)) {
694 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
697 /* If the user specifies memory size, we
698 * limit the BIOS-provided memory map to
699 * that size. exactmap can be used to specify
700 * the exact map. mem=number can be used to
701 * trim the existing memory map.
703 unsigned long long mem_size;
705 mem_size = memparse(from+4, &from);
706 limit_regions(mem_size);
711 else if (!memcmp(from, "memmap=", 7)) {
712 if (to != command_line)
714 if (!memcmp(from+7, "exactmap", 8)) {
719 /* If the user specifies memory size, we
720 * limit the BIOS-provided memory map to
721 * that size. exactmap can be used to specify
722 * the exact map. mem=number can be used to
723 * trim the existing memory map.
725 unsigned long long start_at, mem_size;
727 mem_size = memparse(from+7, &from);
729 start_at = memparse(from+1, &from);
730 add_memory_region(start_at, mem_size, E820_RAM);
731 } else if (*from == '#') {
732 start_at = memparse(from+1, &from);
733 add_memory_region(start_at, mem_size, E820_ACPI);
734 } else if (*from == '$') {
735 start_at = memparse(from+1, &from);
736 add_memory_region(start_at, mem_size, E820_RESERVED);
738 limit_regions(mem_size);
744 else if (!memcmp(from, "noexec=", 7))
745 noexec_setup(from + 7);
748 #ifdef CONFIG_X86_SMP
750 * If the BIOS enumerates physical processors before logical,
751 * maxcpus=N at enumeration-time can be used to disable HT.
753 else if (!memcmp(from, "maxcpus=", 8)) {
754 extern unsigned int maxcpus;
756 maxcpus = simple_strtoul(from + 8, NULL, 0);
760 #ifdef CONFIG_ACPI_BOOT
761 /* "acpi=off" disables both ACPI table parsing and interpreter */
762 else if (!memcmp(from, "acpi=off", 8)) {
766 /* acpi=force to over-ride black-list */
767 else if (!memcmp(from, "acpi=force", 10)) {
773 /* acpi=strict disables out-of-spec workarounds */
774 else if (!memcmp(from, "acpi=strict", 11)) {
778 /* Limit ACPI just to boot-time to enable HT */
779 else if (!memcmp(from, "acpi=ht", 7)) {
785 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
786 else if (!memcmp(from, "pci=noacpi", 10)) {
789 /* "acpi=noirq" disables ACPI interrupt routing */
790 else if (!memcmp(from, "acpi=noirq", 10)) {
794 else if (!memcmp(from, "acpi_sci=edge", 13))
795 acpi_sci_flags.trigger = 1;
797 else if (!memcmp(from, "acpi_sci=level", 14))
798 acpi_sci_flags.trigger = 3;
800 else if (!memcmp(from, "acpi_sci=high", 13))
801 acpi_sci_flags.polarity = 1;
803 else if (!memcmp(from, "acpi_sci=low", 12))
804 acpi_sci_flags.polarity = 3;
806 #ifdef CONFIG_X86_IO_APIC
807 else if (!memcmp(from, "acpi_skip_timer_override", 24))
808 acpi_skip_timer_override = 1;
811 #ifdef CONFIG_X86_LOCAL_APIC
812 /* disable IO-APIC */
813 else if (!memcmp(from, "noapic", 6))
814 disable_ioapic_setup();
815 #endif /* CONFIG_X86_LOCAL_APIC */
816 #endif /* CONFIG_ACPI_BOOT */
819 * highmem=size forces highmem to be exactly 'size' bytes.
820 * This works even on boxes that have no highmem otherwise.
821 * This also works to reduce highmem size on bigger boxes.
823 else if (!memcmp(from, "highmem=", 8))
824 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
827 * vmalloc=size forces the vmalloc area to be exactly 'size'
828 * bytes. This can be used to increase (or decrease) the
829 * vmalloc area - the default is 128m.
831 else if (!memcmp(from, "vmalloc=", 8))
832 __VMALLOC_RESERVE = memparse(from+8, &from);
838 if (COMMAND_LINE_SIZE <= ++len)
843 *cmdline_p = command_line;
845 printk(KERN_INFO "user-defined physical RAM map:\n");
846 print_memory_map("user");
851 * Callback for efi_memory_walk.
854 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
856 unsigned long *max_pfn = arg, pfn;
859 pfn = PFN_UP(end -1);
868 * Find the highest page frame number we have available
870 void __init find_max_pfn(void)
876 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
880 for (i = 0; i < e820.nr_map; i++) {
881 unsigned long start, end;
883 if (e820.map[i].type != E820_RAM)
885 start = PFN_UP(e820.map[i].addr);
886 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
895 * Determine low and high memory ranges:
897 unsigned long __init find_max_low_pfn(void)
899 unsigned long max_low_pfn;
901 max_low_pfn = max_pfn;
902 if (max_low_pfn > MAXMEM_PFN) {
903 if (highmem_pages == -1)
904 highmem_pages = max_pfn - MAXMEM_PFN;
905 if (highmem_pages + MAXMEM_PFN < max_pfn)
906 max_pfn = MAXMEM_PFN + highmem_pages;
907 if (highmem_pages + MAXMEM_PFN > max_pfn) {
908 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
911 max_low_pfn = MAXMEM_PFN;
912 #ifndef CONFIG_HIGHMEM
913 /* Maximum memory usable is what is directly addressable */
914 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
916 if (max_pfn > MAX_NONPAE_PFN)
917 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
919 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
920 max_pfn = MAXMEM_PFN;
921 #else /* !CONFIG_HIGHMEM */
922 #ifndef CONFIG_X86_PAE
923 if (max_pfn > MAX_NONPAE_PFN) {
924 max_pfn = MAX_NONPAE_PFN;
925 printk(KERN_WARNING "Warning only 4GB will be used.\n");
926 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
928 #endif /* !CONFIG_X86_PAE */
929 #endif /* !CONFIG_HIGHMEM */
931 if (highmem_pages == -1)
933 #ifdef CONFIG_HIGHMEM
934 if (highmem_pages >= max_pfn) {
935 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
939 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
940 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
943 max_low_pfn -= highmem_pages;
947 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
954 * Free all available memory for boot time allocation. Used
955 * as a callback function by efi_memory_walk()
959 free_available_memory(unsigned long start, unsigned long end, void *arg)
961 /* check max_low_pfn */
962 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
964 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
965 end = (max_low_pfn + 1) << PAGE_SHIFT;
967 free_bootmem(start, end - start);
972 * Register fully available low RAM pages with the bootmem allocator.
974 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
979 efi_memmap_walk(free_available_memory, NULL);
982 for (i = 0; i < e820.nr_map; i++) {
983 unsigned long curr_pfn, last_pfn, size;
985 * Reserve usable low memory
987 if (e820.map[i].type != E820_RAM)
990 * We are rounding up the start address of usable memory:
992 curr_pfn = PFN_UP(e820.map[i].addr);
993 if (curr_pfn >= max_low_pfn)
996 * ... and at the end of the usable range downwards:
998 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1000 if (last_pfn > max_low_pfn)
1001 last_pfn = max_low_pfn;
1004 * .. finally, did all the rounding and playing
1005 * around just make the area go away?
1007 if (last_pfn <= curr_pfn)
1010 size = last_pfn - curr_pfn;
1011 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1016 * workaround for Dell systems that neglect to reserve EBDA
1018 static void __init reserve_ebda_region(void)
1021 addr = get_bios_ebda();
1023 reserve_bootmem(addr, PAGE_SIZE);
1026 #ifndef CONFIG_NEED_MULTIPLE_NODES
1027 void __init setup_bootmem_allocator(void);
1028 static unsigned long __init setup_memory(void)
1031 * partially used pages are not usable - thus
1032 * we are rounding upwards:
1034 min_low_pfn = PFN_UP(init_pg_tables_end);
1038 max_low_pfn = find_max_low_pfn();
1040 #ifdef CONFIG_HIGHMEM
1041 highstart_pfn = highend_pfn = max_pfn;
1042 if (max_pfn > max_low_pfn) {
1043 highstart_pfn = max_low_pfn;
1045 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1046 pages_to_mb(highend_pfn - highstart_pfn));
1048 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1049 pages_to_mb(max_low_pfn));
1051 setup_bootmem_allocator();
1056 void __init zone_sizes_init(void)
1058 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1059 unsigned int max_dma, low;
1061 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1065 zones_size[ZONE_DMA] = low;
1067 zones_size[ZONE_DMA] = max_dma;
1068 zones_size[ZONE_NORMAL] = low - max_dma;
1069 #ifdef CONFIG_HIGHMEM
1070 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1073 free_area_init(zones_size);
1076 extern unsigned long __init setup_memory(void);
1077 extern void zone_sizes_init(void);
1078 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1080 void __init setup_bootmem_allocator(void)
1082 unsigned long bootmap_size;
1084 * Initialize the boot-time allocator (with low memory only):
1086 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1088 register_bootmem_low_pages(max_low_pfn);
1091 * Reserve the bootmem bitmap itself as well. We do this in two
1092 * steps (first step was init_bootmem()) because this catches
1093 * the (very unlikely) case of us accidentally initializing the
1094 * bootmem allocator with an invalid RAM area.
1096 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1097 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1100 * reserve physical page 0 - it's a special BIOS page on many boxes,
1101 * enabling clean reboots, SMP operation, laptop functions.
1103 reserve_bootmem(0, PAGE_SIZE);
1105 /* reserve EBDA region, it's a 4K region */
1106 reserve_ebda_region();
1108 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1109 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1110 unless you have no PS/2 mouse plugged in. */
1111 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1112 boot_cpu_data.x86 == 6)
1113 reserve_bootmem(0xa0000 - 4096, 4096);
1117 * But first pinch a few for the stack/trampoline stuff
1118 * FIXME: Don't need the extra page at 4K, but need to fix
1119 * trampoline before removing it. (see the GDT stuff)
1121 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1123 #ifdef CONFIG_ACPI_SLEEP
1125 * Reserve low memory region for sleep support.
1127 acpi_reserve_bootmem();
1129 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1131 * Find and reserve possible boot-time SMP configuration:
1136 #ifdef CONFIG_BLK_DEV_INITRD
1137 if (LOADER_TYPE && INITRD_START) {
1138 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1139 reserve_bootmem(INITRD_START, INITRD_SIZE);
1141 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1142 initrd_end = initrd_start+INITRD_SIZE;
1145 printk(KERN_ERR "initrd extends beyond end of memory "
1146 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1147 INITRD_START + INITRD_SIZE,
1148 max_low_pfn << PAGE_SHIFT);
1156 * The node 0 pgdat is initialized before all of these because
1157 * it's needed for bootmem. node>0 pgdats have their virtual
1158 * space allocated before the pagetables are in place to access
1159 * them, so they can't be cleared then.
1161 * This should all compile down to nothing when NUMA is off.
1163 void __init remapped_pgdat_init(void)
1167 for_each_online_node(nid) {
1169 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1174 * Request address space for all standard RAM and ROM resources
1175 * and also for regions reported as reserved by the e820.
1178 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1183 for (i = 0; i < e820.nr_map; i++) {
1184 struct resource *res;
1185 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1187 res = alloc_bootmem_low(sizeof(struct resource));
1188 switch (e820.map[i].type) {
1189 case E820_RAM: res->name = "System RAM"; break;
1190 case E820_ACPI: res->name = "ACPI Tables"; break;
1191 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1192 default: res->name = "reserved";
1194 res->start = e820.map[i].addr;
1195 res->end = res->start + e820.map[i].size - 1;
1196 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1197 request_resource(&iomem_resource, res);
1198 if (e820.map[i].type == E820_RAM) {
1200 * We don't know which RAM region contains kernel data,
1201 * so we try it repeatedly and let the resource manager
1204 request_resource(res, code_resource);
1205 request_resource(res, data_resource);
1211 * Request address space for all standard resources
1213 static void __init register_memory(void)
1215 unsigned long gapstart, gapsize;
1216 unsigned long long last;
1220 efi_initialize_iomem_resources(&code_resource, &data_resource);
1222 legacy_init_iomem_resources(&code_resource, &data_resource);
1224 /* EFI systems may still have VGA */
1225 request_resource(&iomem_resource, &video_ram_resource);
1227 /* request I/O space for devices used on all i[345]86 PCs */
1228 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1229 request_resource(&ioport_resource, &standard_io_resources[i]);
1232 * Search for the bigest gap in the low 32 bits of the e820
1235 last = 0x100000000ull;
1236 gapstart = 0x10000000;
1240 unsigned long long start = e820.map[i].addr;
1241 unsigned long long end = start + e820.map[i].size;
1244 * Since "last" is at most 4GB, we know we'll
1245 * fit in 32 bits if this condition is true
1248 unsigned long gap = last - end;
1250 if (gap > gapsize) {
1260 * Start allocating dynamic PCI memory a bit into the gap,
1261 * aligned up to the nearest megabyte.
1263 * Question: should we try to pad it up a bit (do something
1264 * like " + (gapsize >> 3)" in there too?). We now have the
1267 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1269 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1270 pci_mem_start, gapstart, gapsize);
1273 /* Use inline assembly to define this because the nops are defined
1274 as inline assembly strings in the include files and we cannot
1275 get them easily into strings. */
1276 asm("\t.data\nintelnops: "
1277 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1278 GENERIC_NOP7 GENERIC_NOP8);
1279 asm("\t.data\nk8nops: "
1280 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1282 asm("\t.data\nk7nops: "
1283 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1286 extern unsigned char intelnops[], k8nops[], k7nops[];
1287 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1292 intelnops + 1 + 2 + 3,
1293 intelnops + 1 + 2 + 3 + 4,
1294 intelnops + 1 + 2 + 3 + 4 + 5,
1295 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1296 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1298 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1304 k8nops + 1 + 2 + 3 + 4,
1305 k8nops + 1 + 2 + 3 + 4 + 5,
1306 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1307 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1309 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1315 k7nops + 1 + 2 + 3 + 4,
1316 k7nops + 1 + 2 + 3 + 4 + 5,
1317 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1318 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1322 unsigned char **noptable;
1324 { X86_FEATURE_K8, k8_nops },
1325 { X86_FEATURE_K7, k7_nops },
1329 /* Replace instructions with better alternatives for this CPU type.
1331 This runs before SMP is initialized to avoid SMP problems with
1332 self modifying code. This implies that assymetric systems where
1333 APs have less capabilities than the boot processor are not handled.
1334 In this case boot with "noreplacement". */
1335 void apply_alternatives(void *start, void *end)
1337 struct alt_instr *a;
1339 unsigned char **noptable = intel_nops;
1340 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1341 if (boot_cpu_has(noptypes[i].cpuid)) {
1342 noptable = noptypes[i].noptable;
1346 for (a = start; (void *)a < end; a++) {
1347 if (!boot_cpu_has(a->cpuid))
1349 BUG_ON(a->replacementlen > a->instrlen);
1350 memcpy(a->instr, a->replacement, a->replacementlen);
1351 diff = a->instrlen - a->replacementlen;
1352 /* Pad the rest with nops */
1353 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1355 if (k > ASM_NOP_MAX)
1357 memcpy(a->instr + i, noptable[k], k);
1362 static int no_replacement __initdata = 0;
1364 void __init alternative_instructions(void)
1366 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1369 apply_alternatives(__alt_instructions, __alt_instructions_end);
1372 static int __init noreplacement_setup(char *s)
1378 __setup("noreplacement", noreplacement_setup);
1380 static char * __init machine_specific_memory_setup(void);
1383 static void set_mca_bus(int x)
1388 static void set_mca_bus(int x) { }
1392 * Determine if we were loaded by an EFI loader. If so, then we have also been
1393 * passed the efi memmap, systab, etc., so we should use these data structures
1394 * for initialization. Note, the efi init code path is determined by the
1395 * global efi_enabled. This allows the same kernel image to be used on existing
1396 * systems (with a traditional BIOS) as well as on EFI systems.
1398 void __init setup_arch(char **cmdline_p)
1400 unsigned long max_low_pfn;
1402 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1403 pre_setup_arch_hook();
1407 * FIXME: This isn't an official loader_type right
1408 * now but does currently work with elilo.
1409 * If we were configured as an EFI kernel, check to make
1410 * sure that we were loaded correctly from elilo and that
1411 * the system table is valid. If not, then initialize normally.
1414 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1418 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1419 drive_info = DRIVE_INFO;
1420 screen_info = SCREEN_INFO;
1421 edid_info = EDID_INFO;
1422 apm_info.bios = APM_BIOS_INFO;
1423 ist_info = IST_INFO;
1424 saved_videomode = VIDEO_MODE;
1425 if( SYS_DESC_TABLE.length != 0 ) {
1426 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1427 machine_id = SYS_DESC_TABLE.table[0];
1428 machine_submodel_id = SYS_DESC_TABLE.table[1];
1429 BIOS_revision = SYS_DESC_TABLE.table[2];
1431 bootloader_type = LOADER_TYPE;
1433 #ifdef CONFIG_BLK_DEV_RAM
1434 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1435 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1436 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1442 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1443 print_memory_map(machine_specific_memory_setup());
1448 if (!MOUNT_ROOT_RDONLY)
1449 root_mountflags &= ~MS_RDONLY;
1450 init_mm.start_code = (unsigned long) _text;
1451 init_mm.end_code = (unsigned long) _etext;
1452 init_mm.end_data = (unsigned long) _edata;
1453 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1455 code_resource.start = virt_to_phys(_text);
1456 code_resource.end = virt_to_phys(_etext)-1;
1457 data_resource.start = virt_to_phys(_etext);
1458 data_resource.end = virt_to_phys(_edata)-1;
1460 parse_cmdline_early(cmdline_p);
1462 max_low_pfn = setup_memory();
1465 * NOTE: before this point _nobody_ is allowed to allocate
1466 * any memory using the bootmem allocator. Although the
1467 * alloctor is now initialised only the first 8Mb of the kernel
1468 * virtual address space has been mapped. All allocations before
1469 * paging_init() has completed must use the alloc_bootmem_low_pages()
1470 * variant (which allocates DMA'able memory) and care must be taken
1471 * not to exceed the 8Mb limit.
1475 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1478 remapped_pgdat_init();
1483 * NOTE: at this point the bootmem allocator is fully available.
1486 #ifdef CONFIG_EARLY_PRINTK
1488 char *s = strstr(*cmdline_p, "earlyprintk=");
1490 extern void setup_early_printk(char *);
1492 setup_early_printk(s);
1493 printk("early console enabled\n");
1501 #ifdef CONFIG_X86_GENERICARCH
1502 generic_apic_probe(*cmdline_p);
1507 #ifdef CONFIG_ACPI_BOOT
1509 * Parse the ACPI tables for possible boot-time SMP configuration.
1511 acpi_boot_table_init();
1515 #ifdef CONFIG_X86_LOCAL_APIC
1516 if (smp_found_config)
1523 #if defined(CONFIG_VGA_CONSOLE)
1524 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1525 conswitchp = &vga_con;
1526 #elif defined(CONFIG_DUMMY_CONSOLE)
1527 conswitchp = &dummy_con;
1532 #include "setup_arch_post.h"
1536 * c-file-style:"k&r"