blob: 466450167dead2162cb4b35c19593ecf4d358b26 [file] [log] [blame]
Eric W. Biederman5033cba2005-06-25 14:57:56 -07001/*
Dave Jones835c34a2007-10-12 21:10:53 -04002 * handle transition of Linux booting another kernel
Eric W. Biederman5033cba2005-06-25 14:57:56 -07003 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/mm.h>
10#include <linux/kexec.h>
11#include <linux/delay.h>
Rusty Russell1a3f2392006-09-26 10:52:32 +020012#include <linux/init.h>
Ken'ichi Ohmichifd59d232007-10-16 23:27:27 -070013#include <linux/numa.h>
Ingo Molnarf43fdad2008-05-12 21:20:43 +020014#include <linux/ftrace.h>
15
Eric W. Biederman5033cba2005-06-25 14:57:56 -070016#include <asm/pgtable.h>
17#include <asm/pgalloc.h>
18#include <asm/tlbflush.h>
19#include <asm/mmu_context.h>
20#include <asm/io.h>
21#include <asm/apic.h>
22#include <asm/cpufeature.h>
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060023#include <asm/desc.h>
Zachary Amsden4bb0d3e2005-09-03 15:56:36 -070024#include <asm/system.h>
Huang Ying3ab83522008-07-25 19:45:07 -070025#include <asm/cacheflush.h>
Eric W. Biederman5033cba2005-06-25 14:57:56 -070026
27#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
Magnus Damm35665612006-09-26 10:52:38 +020028static u32 kexec_pgd[1024] PAGE_ALIGNED;
29#ifdef CONFIG_X86_PAE
30static u32 kexec_pmd0[1024] PAGE_ALIGNED;
31static u32 kexec_pmd1[1024] PAGE_ALIGNED;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070032#endif
Magnus Damm35665612006-09-26 10:52:38 +020033static u32 kexec_pte0[1024] PAGE_ALIGNED;
34static u32 kexec_pte1[1024] PAGE_ALIGNED;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070035
Eric W. Biederman5033cba2005-06-25 14:57:56 -070036static void set_idt(void *newidt, __u16 limit)
37{
Glauber de Oliveira Costa6b68f012008-01-30 13:31:12 +010038 struct desc_ptr curidt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070039
40 /* ia32 supports unaliged loads & stores */
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060041 curidt.size = limit;
42 curidt.address = (unsigned long)newidt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070043
Zachary Amsdenf2ab4462005-09-03 15:56:42 -070044 load_idt(&curidt);
WANG Cong378fc6e2008-06-24 16:21:18 +010045}
Eric W. Biederman5033cba2005-06-25 14:57:56 -070046
47
48static void set_gdt(void *newgdt, __u16 limit)
49{
Glauber de Oliveira Costa6b68f012008-01-30 13:31:12 +010050 struct desc_ptr curgdt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070051
52 /* ia32 supports unaligned loads & stores */
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060053 curgdt.size = limit;
54 curgdt.address = (unsigned long)newgdt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070055
Zachary Amsdenf2ab4462005-09-03 15:56:42 -070056 load_gdt(&curgdt);
WANG Cong378fc6e2008-06-24 16:21:18 +010057}
Eric W. Biederman5033cba2005-06-25 14:57:56 -070058
59static void load_segments(void)
60{
61#define __STR(X) #X
62#define STR(X) __STR(X)
63
64 __asm__ __volatile__ (
65 "\tljmp $"STR(__KERNEL_CS)",$1f\n"
66 "\t1:\n"
Michael Matz2ec5e3a2006-03-07 21:55:48 -080067 "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
68 "\tmovl %%eax,%%ds\n"
69 "\tmovl %%eax,%%es\n"
70 "\tmovl %%eax,%%fs\n"
71 "\tmovl %%eax,%%gs\n"
72 "\tmovl %%eax,%%ss\n"
73 ::: "eax", "memory");
Eric W. Biederman5033cba2005-06-25 14:57:56 -070074#undef STR
75#undef __STR
76}
77
Eric W. Biederman5033cba2005-06-25 14:57:56 -070078/*
79 * A architecture hook called to validate the
80 * proposed image and prepare the control pages
Huang Ying163f6872008-08-15 00:40:22 -070081 * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
Eric W. Biederman5033cba2005-06-25 14:57:56 -070082 * have been allocated, but the segments have yet
83 * been copied into the kernel.
84 *
85 * Do what every setup is needed on image and the
86 * reboot code buffer to allow us to avoid allocations
87 * later.
88 *
Huang Ying3ab83522008-07-25 19:45:07 -070089 * Make control page executable.
Eric W. Biederman5033cba2005-06-25 14:57:56 -070090 */
91int machine_kexec_prepare(struct kimage *image)
92{
Huang Ying3ab83522008-07-25 19:45:07 -070093 if (nx_enabled)
94 set_pages_x(image->control_code_page, 1);
Eric W. Biederman5033cba2005-06-25 14:57:56 -070095 return 0;
96}
97
98/*
99 * Undo anything leftover by machine_kexec_prepare
100 * when an image is freed.
101 */
102void machine_kexec_cleanup(struct kimage *image)
103{
Huang Ying3ab83522008-07-25 19:45:07 -0700104 if (nx_enabled)
105 set_pages_nx(image->control_code_page, 1);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700106}
107
108/*
109 * Do not allocate memory (or fail in any way) in machine_kexec().
110 * We are past the point of no return, committed to rebooting now.
111 */
Huang Ying3ab83522008-07-25 19:45:07 -0700112void machine_kexec(struct kimage *image)
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700113{
Magnus Damm35665612006-09-26 10:52:38 +0200114 unsigned long page_list[PAGES_NR];
115 void *control_page;
Huang Ying3ab83522008-07-25 19:45:07 -0700116 asmlinkage unsigned long
117 (*relocate_kernel_ptr)(unsigned long indirection_page,
118 unsigned long control_page,
119 unsigned long start_address,
120 unsigned int has_pae,
121 unsigned int preserve_context);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700122
Ingo Molnarf43fdad2008-05-12 21:20:43 +0200123 tracer_disable();
124
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700125 /* Interrupts aren't acceptable while we reboot */
126 local_irq_disable();
127
Huang Ying89081d12008-07-25 19:45:10 -0700128 if (image->preserve_context) {
129#ifdef CONFIG_X86_IO_APIC
130 /* We need to put APICs in legacy mode so that we can
131 * get timer interrupts in second kernel. kexec/kdump
132 * paths already have calls to disable_IO_APIC() in
133 * one form or other. kexec jump path also need
134 * one.
135 */
136 disable_IO_APIC();
137#endif
138 }
139
Magnus Damm35665612006-09-26 10:52:38 +0200140 control_page = page_address(image->control_code_page);
Huang Ying3ab83522008-07-25 19:45:07 -0700141 memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700142
Huang Ying3ab83522008-07-25 19:45:07 -0700143 relocate_kernel_ptr = control_page;
Magnus Damm35665612006-09-26 10:52:38 +0200144 page_list[PA_CONTROL_PAGE] = __pa(control_page);
Huang Ying3ab83522008-07-25 19:45:07 -0700145 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
Magnus Damm35665612006-09-26 10:52:38 +0200146 page_list[PA_PGD] = __pa(kexec_pgd);
147 page_list[VA_PGD] = (unsigned long)kexec_pgd;
148#ifdef CONFIG_X86_PAE
149 page_list[PA_PMD_0] = __pa(kexec_pmd0);
150 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
151 page_list[PA_PMD_1] = __pa(kexec_pmd1);
152 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
153#endif
154 page_list[PA_PTE_0] = __pa(kexec_pte0);
155 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
156 page_list[PA_PTE_1] = __pa(kexec_pte1);
157 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
Huang Ying3ab83522008-07-25 19:45:07 -0700158 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700159
Eric W. Biederman2a8a3d52006-07-30 03:03:20 -0700160 /* The segment registers are funny things, they have both a
161 * visible and an invisible part. Whenever the visible part is
162 * set to a specific selector, the invisible part is loaded
163 * with from a table in memory. At no other time is the
164 * descriptor table in memory accessed.
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700165 *
166 * I take advantage of this here by force loading the
167 * segments, before I zap the gdt with an invalid value.
168 */
169 load_segments();
170 /* The gdt & idt are now invalid.
171 * If you want to load them you must set up your own idt & gdt.
172 */
173 set_gdt(phys_to_virt(0),0);
174 set_idt(phys_to_virt(0),0);
175
176 /* now call it */
Huang Ying3ab83522008-07-25 19:45:07 -0700177 image->start = relocate_kernel_ptr((unsigned long)image->head,
178 (unsigned long)page_list,
179 image->start, cpu_has_pae,
180 image->preserve_context);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700181}
Rusty Russell1a3f2392006-09-26 10:52:32 +0200182
Ken'ichi Ohmichifd59d232007-10-16 23:27:27 -0700183void arch_crash_save_vmcoreinfo(void)
184{
Ken'ichi Ohmichi92df5c32008-02-07 00:15:23 -0800185#ifdef CONFIG_NUMA
Ken'ichi Ohmichibcbba6c2007-10-16 23:27:30 -0700186 VMCOREINFO_SYMBOL(node_data);
187 VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
Ken'ichi Ohmichifd59d232007-10-16 23:27:27 -0700188#endif
189#ifdef CONFIG_X86_PAE
Ken'ichi Ohmichibcbba6c2007-10-16 23:27:30 -0700190 VMCOREINFO_CONFIG(X86_PAE);
Ken'ichi Ohmichifd59d232007-10-16 23:27:27 -0700191#endif
192}
193