blob: 66c3dc99a655f8c208508d94992eb0c56831697b [file] [log] [blame]
Eric W. Biederman5033cba2005-06-25 14:57:56 -07001/*
2 * machine_kexec.c - handle transition of Linux booting another kernel
3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
4 *
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
7 */
8
9#include <linux/mm.h>
10#include <linux/kexec.h>
11#include <linux/delay.h>
Rusty Russell1a3f2392006-09-26 10:52:32 +020012#include <linux/init.h>
Eric W. Biederman5033cba2005-06-25 14:57:56 -070013#include <asm/pgtable.h>
14#include <asm/pgalloc.h>
15#include <asm/tlbflush.h>
16#include <asm/mmu_context.h>
17#include <asm/io.h>
18#include <asm/apic.h>
19#include <asm/cpufeature.h>
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060020#include <asm/desc.h>
Zachary Amsden4bb0d3e2005-09-03 15:56:36 -070021#include <asm/system.h>
Eric W. Biederman5033cba2005-06-25 14:57:56 -070022
23#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
24
25#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
26#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
27#define L2_ATTR (_PAGE_PRESENT)
28
29#define LEVEL0_SIZE (1UL << 12UL)
30
31#ifndef CONFIG_X86_PAE
32#define LEVEL1_SIZE (1UL << 22UL)
33static u32 pgtable_level1[1024] PAGE_ALIGNED;
34
35static void identity_map_page(unsigned long address)
36{
37 unsigned long level1_index, level2_index;
38 u32 *pgtable_level2;
39
40 /* Find the current page table */
41 pgtable_level2 = __va(read_cr3());
42
43 /* Find the indexes of the physical address to identity map */
44 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
45 level2_index = address / LEVEL1_SIZE;
46
47 /* Identity map the page table entry */
48 pgtable_level1[level1_index] = address | L0_ATTR;
49 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
50
51 /* Flush the tlb so the new mapping takes effect.
52 * Global tlb entries are not flushed but that is not an issue.
53 */
54 load_cr3(pgtable_level2);
55}
56
57#else
58#define LEVEL1_SIZE (1UL << 21UL)
59#define LEVEL2_SIZE (1UL << 30UL)
60static u64 pgtable_level1[512] PAGE_ALIGNED;
61static u64 pgtable_level2[512] PAGE_ALIGNED;
62
63static void identity_map_page(unsigned long address)
64{
65 unsigned long level1_index, level2_index, level3_index;
66 u64 *pgtable_level3;
67
68 /* Find the current page table */
69 pgtable_level3 = __va(read_cr3());
70
71 /* Find the indexes of the physical address to identity map */
72 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
73 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
74 level3_index = address / LEVEL2_SIZE;
75
76 /* Identity map the page table entry */
77 pgtable_level1[level1_index] = address | L0_ATTR;
78 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
Maneesh Soni72414d32005-06-25 14:58:28 -070079 set_64bit(&pgtable_level3[level3_index],
80 __pa(pgtable_level2) | L2_ATTR);
Eric W. Biederman5033cba2005-06-25 14:57:56 -070081
82 /* Flush the tlb so the new mapping takes effect.
83 * Global tlb entries are not flushed but that is not an issue.
84 */
85 load_cr3(pgtable_level3);
86}
87#endif
88
Eric W. Biederman5033cba2005-06-25 14:57:56 -070089static void set_idt(void *newidt, __u16 limit)
90{
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060091 struct Xgt_desc_struct curidt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070092
93 /* ia32 supports unaliged loads & stores */
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -060094 curidt.size = limit;
95 curidt.address = (unsigned long)newidt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -070096
Zachary Amsdenf2ab4462005-09-03 15:56:42 -070097 load_idt(&curidt);
Eric W. Biederman5033cba2005-06-25 14:57:56 -070098};
99
100
101static void set_gdt(void *newgdt, __u16 limit)
102{
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -0600103 struct Xgt_desc_struct curgdt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700104
105 /* ia32 supports unaligned loads & stores */
Eric W. Biedermane7b47cc2005-07-29 13:01:18 -0600106 curgdt.size = limit;
107 curgdt.address = (unsigned long)newgdt;
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700108
Zachary Amsdenf2ab4462005-09-03 15:56:42 -0700109 load_gdt(&curgdt);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700110};
111
112static void load_segments(void)
113{
114#define __STR(X) #X
115#define STR(X) __STR(X)
116
117 __asm__ __volatile__ (
118 "\tljmp $"STR(__KERNEL_CS)",$1f\n"
119 "\t1:\n"
Michael Matz2ec5e3a2006-03-07 21:55:48 -0800120 "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
121 "\tmovl %%eax,%%ds\n"
122 "\tmovl %%eax,%%es\n"
123 "\tmovl %%eax,%%fs\n"
124 "\tmovl %%eax,%%gs\n"
125 "\tmovl %%eax,%%ss\n"
126 ::: "eax", "memory");
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700127#undef STR
128#undef __STR
129}
130
131typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
Maneesh Soni72414d32005-06-25 14:58:28 -0700132 unsigned long indirection_page,
133 unsigned long reboot_code_buffer,
134 unsigned long start_address,
135 unsigned int has_pae) ATTRIB_NORET;
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700136
Tobias Klauser2efe55a2006-06-26 18:57:34 +0200137extern const unsigned char relocate_new_kernel[];
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700138extern void relocate_new_kernel_end(void);
Tobias Klauser2efe55a2006-06-26 18:57:34 +0200139extern const unsigned int relocate_new_kernel_size;
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700140
141/*
142 * A architecture hook called to validate the
143 * proposed image and prepare the control pages
144 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
145 * have been allocated, but the segments have yet
146 * been copied into the kernel.
147 *
148 * Do what every setup is needed on image and the
149 * reboot code buffer to allow us to avoid allocations
150 * later.
151 *
152 * Currently nothing.
153 */
154int machine_kexec_prepare(struct kimage *image)
155{
156 return 0;
157}
158
159/*
160 * Undo anything leftover by machine_kexec_prepare
161 * when an image is freed.
162 */
163void machine_kexec_cleanup(struct kimage *image)
164{
165}
166
167/*
168 * Do not allocate memory (or fail in any way) in machine_kexec().
169 * We are past the point of no return, committed to rebooting now.
170 */
171NORET_TYPE void machine_kexec(struct kimage *image)
172{
173 unsigned long page_list;
174 unsigned long reboot_code_buffer;
Maneesh Soni72414d32005-06-25 14:58:28 -0700175
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700176 relocate_new_kernel_t rnk;
177
178 /* Interrupts aren't acceptable while we reboot */
179 local_irq_disable();
180
181 /* Compute some offsets */
Maneesh Soni72414d32005-06-25 14:58:28 -0700182 reboot_code_buffer = page_to_pfn(image->control_code_page)
183 << PAGE_SHIFT;
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700184 page_list = image->head;
185
186 /* Set up an identity mapping for the reboot_code_buffer */
187 identity_map_page(reboot_code_buffer);
188
189 /* copy it out */
Maneesh Soni72414d32005-06-25 14:58:28 -0700190 memcpy((void *)reboot_code_buffer, relocate_new_kernel,
191 relocate_new_kernel_size);
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700192
Eric W. Biederman2a8a3d52006-07-30 03:03:20 -0700193 /* The segment registers are funny things, they have both a
194 * visible and an invisible part. Whenever the visible part is
195 * set to a specific selector, the invisible part is loaded
196 * with from a table in memory. At no other time is the
197 * descriptor table in memory accessed.
Eric W. Biederman5033cba2005-06-25 14:57:56 -0700198 *
199 * I take advantage of this here by force loading the
200 * segments, before I zap the gdt with an invalid value.
201 */
202 load_segments();
203 /* The gdt & idt are now invalid.
204 * If you want to load them you must set up your own idt & gdt.
205 */
206 set_gdt(phys_to_virt(0),0);
207 set_idt(phys_to_virt(0),0);
208
209 /* now call it */
210 rnk = (relocate_new_kernel_t) reboot_code_buffer;
211 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
212}
Rusty Russell1a3f2392006-09-26 10:52:32 +0200213
214/* crashkernel=size@addr specifies the location to reserve for
215 * a crash kernel. By reserving this memory we guarantee
216 * that linux never sets it up as a DMA target.
217 * Useful for holding code to do something appropriate
218 * after a kernel panic.
219 */
220static int __init parse_crashkernel(char *arg)
221{
222 unsigned long size, base;
223 size = memparse(arg, &arg);
224 if (*arg == '@') {
225 base = memparse(arg+1, &arg);
226 /* FIXME: Do I want a sanity check
227 * to validate the memory range?
228 */
229 crashk_res.start = base;
230 crashk_res.end = base + size - 1;
231 }
232 return 0;
233}
234early_param("crashkernel", parse_crashkernel);