Merge branch 'linus' into cpus4096
[linux-2.6.git] / arch / x86 / xen / smp.c
1 /*
2  * Xen SMP support
3  *
4  * This file implements the Xen versions of smp_ops.  SMP under Xen is
5  * very straightforward.  Bringing a CPU up is simply a matter of
6  * loading its initial context and setting it running.
7  *
8  * IPIs are handled through the Xen event mechanism.
9  *
10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
11  * useful topology information for the kernel to make use of.  As a
12  * result, all CPUs are treated as if they're single-core and
13  * single-threaded.
14  *
15  * This does not handle HOTPLUG_CPU yet.
16  */
17 #include <linux/sched.h>
18 #include <linux/err.h>
19 #include <linux/smp.h>
20
21 #include <asm/paravirt.h>
22 #include <asm/desc.h>
23 #include <asm/pgtable.h>
24 #include <asm/cpu.h>
25
26 #include <xen/interface/xen.h>
27 #include <xen/interface/vcpu.h>
28
29 #include <asm/xen/interface.h>
30 #include <asm/xen/hypercall.h>
31
32 #include <xen/page.h>
33 #include <xen/events.h>
34
35 #include "xen-ops.h"
36 #include "mmu.h"
37
38 cpumask_t xen_cpu_initialized_map;
39
40 static DEFINE_PER_CPU(int, resched_irq);
41 static DEFINE_PER_CPU(int, callfunc_irq);
42 static DEFINE_PER_CPU(int, callfuncsingle_irq);
43 static DEFINE_PER_CPU(int, debug_irq) = -1;
44
45 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
46 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
47
48 /*
49  * Reschedule call back. Nothing to do,
50  * all the work is done automatically when
51  * we return from the interrupt.
52  */
53 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
54 {
55 #ifdef CONFIG_X86_32
56         __get_cpu_var(irq_stat).irq_resched_count++;
57 #else
58         add_pda(irq_resched_count, 1);
59 #endif
60
61         return IRQ_HANDLED;
62 }
63
64 static __cpuinit void cpu_bringup_and_idle(void)
65 {
66         int cpu = smp_processor_id();
67
68         cpu_init();
69         xen_enable_sysenter();
70
71         preempt_disable();
72         per_cpu(cpu_state, cpu) = CPU_ONLINE;
73
74         xen_setup_cpu_clockevents();
75
76         /* We can take interrupts now: we're officially "up". */
77         local_irq_enable();
78
79         wmb();                  /* make sure everything is out */
80         cpu_idle();
81 }
82
83 static int xen_smp_intr_init(unsigned int cpu)
84 {
85         int rc;
86         const char *resched_name, *callfunc_name, *debug_name;
87
88         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
89         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
90                                     cpu,
91                                     xen_reschedule_interrupt,
92                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
93                                     resched_name,
94                                     NULL);
95         if (rc < 0)
96                 goto fail;
97         per_cpu(resched_irq, cpu) = rc;
98
99         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
100         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
101                                     cpu,
102                                     xen_call_function_interrupt,
103                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
104                                     callfunc_name,
105                                     NULL);
106         if (rc < 0)
107                 goto fail;
108         per_cpu(callfunc_irq, cpu) = rc;
109
110         debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
111         rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
112                                      IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
113                                      debug_name, NULL);
114         if (rc < 0)
115                 goto fail;
116         per_cpu(debug_irq, cpu) = rc;
117
118         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
119         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
120                                     cpu,
121                                     xen_call_function_single_interrupt,
122                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
123                                     callfunc_name,
124                                     NULL);
125         if (rc < 0)
126                 goto fail;
127         per_cpu(callfuncsingle_irq, cpu) = rc;
128
129         return 0;
130
131  fail:
132         if (per_cpu(resched_irq, cpu) >= 0)
133                 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
134         if (per_cpu(callfunc_irq, cpu) >= 0)
135                 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
136         if (per_cpu(debug_irq, cpu) >= 0)
137                 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
138         if (per_cpu(callfuncsingle_irq, cpu) >= 0)
139                 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
140
141         return rc;
142 }
143
144 void __init xen_fill_possible_map(void)
145 {
146         int i, rc;
147
148         for (i = 0; i < NR_CPUS; i++) {
149                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
150                 if (rc >= 0)
151                         cpu_set(i, cpu_possible_map);
152         }
153 }
154
155 void __init xen_smp_prepare_boot_cpu(void)
156 {
157         int cpu;
158
159         BUG_ON(smp_processor_id() != 0);
160         native_smp_prepare_boot_cpu();
161
162         /* We've switched to the "real" per-cpu gdt, so make sure the
163            old memory can be recycled */
164         make_lowmem_page_readwrite(&per_cpu__gdt_page);
165
166         for_each_possible_cpu(cpu) {
167                 cpus_clear(per_cpu(cpu_sibling_map, cpu));
168                 /*
169                  * cpu_core_map lives in a per cpu area that is cleared
170                  * when the per cpu array is allocated.
171                  *
172                  * cpus_clear(per_cpu(cpu_core_map, cpu));
173                  */
174         }
175
176         xen_setup_vcpu_info_placement();
177 }
178
179 void __init xen_smp_prepare_cpus(unsigned int max_cpus)
180 {
181         unsigned cpu;
182
183         for_each_possible_cpu(cpu) {
184                 cpus_clear(per_cpu(cpu_sibling_map, cpu));
185                 /*
186                  * cpu_core_ map will be zeroed when the per
187                  * cpu area is allocated.
188                  *
189                  * cpus_clear(per_cpu(cpu_core_map, cpu));
190                  */
191         }
192
193         smp_store_cpu_info(0);
194         set_cpu_sibling_map(0);
195
196         if (xen_smp_intr_init(0))
197                 BUG();
198
199         xen_cpu_initialized_map = cpumask_of_cpu(0);
200
201         /* Restrict the possible_map according to max_cpus. */
202         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
203                 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
204                         continue;
205                 cpu_clear(cpu, cpu_possible_map);
206         }
207
208         for_each_possible_cpu (cpu) {
209                 struct task_struct *idle;
210
211                 if (cpu == 0)
212                         continue;
213
214                 idle = fork_idle(cpu);
215                 if (IS_ERR(idle))
216                         panic("failed fork for CPU %d", cpu);
217
218                 cpu_set(cpu, cpu_present_map);
219         }
220
221         //init_xenbus_allowed_cpumask();
222 }
223
224 static __cpuinit int
225 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
226 {
227         struct vcpu_guest_context *ctxt;
228         struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
229
230         if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
231                 return 0;
232
233         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
234         if (ctxt == NULL)
235                 return -ENOMEM;
236
237         ctxt->flags = VGCF_IN_KERNEL;
238         ctxt->user_regs.ds = __USER_DS;
239         ctxt->user_regs.es = __USER_DS;
240         ctxt->user_regs.fs = __KERNEL_PERCPU;
241         ctxt->user_regs.gs = 0;
242         ctxt->user_regs.ss = __KERNEL_DS;
243         ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
244         ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
245
246         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
247
248         xen_copy_trap_info(ctxt->trap_ctxt);
249
250         ctxt->ldt_ents = 0;
251
252         BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
253         make_lowmem_page_readonly(gdt->gdt);
254
255         ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
256         ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
257
258         ctxt->user_regs.cs = __KERNEL_CS;
259         ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
260
261         ctxt->kernel_ss = __KERNEL_DS;
262         ctxt->kernel_sp = idle->thread.sp0;
263
264         ctxt->event_callback_cs     = __KERNEL_CS;
265         ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
266         ctxt->failsafe_callback_cs  = __KERNEL_CS;
267         ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
268
269         per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
270         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
271
272         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
273                 BUG();
274
275         kfree(ctxt);
276         return 0;
277 }
278
279 int __cpuinit xen_cpu_up(unsigned int cpu)
280 {
281         struct task_struct *idle = idle_task(cpu);
282         int rc;
283
284 #if 0
285         rc = cpu_up_check(cpu);
286         if (rc)
287                 return rc;
288 #endif
289
290         init_gdt(cpu);
291         per_cpu(current_task, cpu) = idle;
292         irq_ctx_init(cpu);
293         xen_setup_timer(cpu);
294
295         /* make sure interrupts start blocked */
296         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
297
298         rc = cpu_initialize_context(cpu, idle);
299         if (rc)
300                 return rc;
301
302         if (num_online_cpus() == 1)
303                 alternatives_smp_switch(1);
304
305         rc = xen_smp_intr_init(cpu);
306         if (rc)
307                 return rc;
308
309         smp_store_cpu_info(cpu);
310         set_cpu_sibling_map(cpu);
311         /* This must be done before setting cpu_online_map */
312         wmb();
313
314         cpu_set(cpu, cpu_online_map);
315
316         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
317         BUG_ON(rc);
318
319         return 0;
320 }
321
322 void xen_smp_cpus_done(unsigned int max_cpus)
323 {
324 }
325
326 static void stop_self(void *v)
327 {
328         int cpu = smp_processor_id();
329
330         /* make sure we're not pinning something down */
331         load_cr3(swapper_pg_dir);
332         /* should set up a minimal gdt */
333
334         HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
335         BUG();
336 }
337
338 void xen_smp_send_stop(void)
339 {
340         smp_call_function(stop_self, NULL, 0);
341 }
342
343 void xen_smp_send_reschedule(int cpu)
344 {
345         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
346 }
347
348 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
349 {
350         unsigned cpu;
351
352         cpus_and(mask, mask, cpu_online_map);
353
354         for_each_cpu_mask_nr(cpu, mask)
355                 xen_send_IPI_one(cpu, vector);
356 }
357
358 void xen_smp_send_call_function_ipi(cpumask_t mask)
359 {
360         int cpu;
361
362         xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
363
364         /* Make sure other vcpus get a chance to run if they need to. */
365         for_each_cpu_mask_nr(cpu, mask) {
366                 if (xen_vcpu_stolen(cpu)) {
367                         HYPERVISOR_sched_op(SCHEDOP_yield, 0);
368                         break;
369                 }
370         }
371 }
372
373 void xen_smp_send_call_function_single_ipi(int cpu)
374 {
375         xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
376 }
377
378 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
379 {
380         irq_enter();
381         generic_smp_call_function_interrupt();
382         __get_cpu_var(irq_stat).irq_call_count++;
383         irq_exit();
384
385         return IRQ_HANDLED;
386 }
387
388 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
389 {
390         irq_enter();
391         generic_smp_call_function_single_interrupt();
392         __get_cpu_var(irq_stat).irq_call_count++;
393         irq_exit();
394
395         return IRQ_HANDLED;
396 }