]> nv-tegra.nvidia Code Review - linux-3.10.git/blob - arch/blackfin/mach-common/smp.c
blackfin: smp: fix msg queue overflow issue
[linux-3.10.git] / arch / blackfin / mach-common / smp.c
1 /*
2  * IPI management based on arch/arm/kernel/smp.c (Copyright 2002 ARM Limited)
3  *
4  * Copyright 2007-2009 Analog Devices Inc.
5  *                         Philippe Gerum <rpm@xenomai.org>
6  *
7  * Licensed under the GPL-2.
8  */
9
10 #include <linux/module.h>
11 #include <linux/delay.h>
12 #include <linux/init.h>
13 #include <linux/spinlock.h>
14 #include <linux/sched.h>
15 #include <linux/interrupt.h>
16 #include <linux/cache.h>
17 #include <linux/clockchips.h>
18 #include <linux/profile.h>
19 #include <linux/errno.h>
20 #include <linux/mm.h>
21 #include <linux/cpu.h>
22 #include <linux/smp.h>
23 #include <linux/cpumask.h>
24 #include <linux/seq_file.h>
25 #include <linux/irq.h>
26 #include <linux/slab.h>
27 #include <linux/atomic.h>
28 #include <asm/cacheflush.h>
29 #include <asm/irq_handler.h>
30 #include <asm/mmu_context.h>
31 #include <asm/pgtable.h>
32 #include <asm/pgalloc.h>
33 #include <asm/processor.h>
34 #include <asm/ptrace.h>
35 #include <asm/cpu.h>
36 #include <asm/time.h>
37 #include <linux/err.h>
38
39 /*
40  * Anomaly notes:
41  * 05000120 - we always define corelock as 32-bit integer in L2
42  */
43 struct corelock_slot corelock __attribute__ ((__section__(".l2.bss")));
44
45 #ifdef CONFIG_ICACHE_FLUSH_L1
46 unsigned long blackfin_iflush_l1_entry[NR_CPUS];
47 #endif
48
49 struct blackfin_initial_pda __cpuinitdata initial_pda_coreb;
50
51 #define BFIN_IPI_TIMER        0
52 #define BFIN_IPI_RESCHEDULE   1
53 #define BFIN_IPI_CALL_FUNC    2
54 #define BFIN_IPI_CPU_STOP     3
55
56 struct blackfin_flush_data {
57         unsigned long start;
58         unsigned long end;
59 };
60
61 void *secondary_stack;
62
63
64 struct smp_call_struct {
65         void (*func)(void *info);
66         void *info;
67         int wait;
68         cpumask_t *waitmask;
69 };
70
71 static struct blackfin_flush_data smp_flush_data;
72
73 static DEFINE_SPINLOCK(stop_lock);
74
75 struct ipi_message {
76         unsigned long type;
77         struct smp_call_struct call_struct;
78 };
79
80 /* A magic number - stress test shows this is safe for common cases */
81 #define BFIN_IPI_MSGQ_LEN 5
82
83 /* Simple FIFO buffer, overflow leads to panic */
84 struct ipi_message_queue {
85         spinlock_t lock;
86         unsigned long count;
87         unsigned long head; /* head of the queue */
88         struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN];
89 };
90
91 static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue);
92
93 static void ipi_cpu_stop(unsigned int cpu)
94 {
95         spin_lock(&stop_lock);
96         printk(KERN_CRIT "CPU%u: stopping\n", cpu);
97         dump_stack();
98         spin_unlock(&stop_lock);
99
100         set_cpu_online(cpu, false);
101
102         local_irq_disable();
103
104         while (1)
105                 SSYNC();
106 }
107
108 static void ipi_flush_icache(void *info)
109 {
110         struct blackfin_flush_data *fdata = info;
111
112         /* Invalidate the memory holding the bounds of the flushed region. */
113         blackfin_dcache_invalidate_range((unsigned long)fdata,
114                                          (unsigned long)fdata + sizeof(*fdata));
115
116         /* Make sure all write buffers in the data side of the core
117          * are flushed before trying to invalidate the icache.  This
118          * needs to be after the data flush and before the icache
119          * flush so that the SSYNC does the right thing in preventing
120          * the instruction prefetcher from hitting things in cached
121          * memory at the wrong time -- it runs much further ahead than
122          * the pipeline.
123          */
124         SSYNC();
125
126         /* ipi_flaush_icache is invoked by generic flush_icache_range,
127          * so call blackfin arch icache flush directly here.
128          */
129         blackfin_icache_flush_range(fdata->start, fdata->end);
130 }
131
132 static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
133 {
134         int wait;
135         void (*func)(void *info);
136         void *info;
137         func = msg->call_struct.func;
138         info = msg->call_struct.info;
139         wait = msg->call_struct.wait;
140         func(info);
141         if (wait) {
142 #ifdef __ARCH_SYNC_CORE_DCACHE
143                 /*
144                  * 'wait' usually means synchronization between CPUs.
145                  * Invalidate D cache in case shared data was changed
146                  * by func() to ensure cache coherence.
147                  */
148                 resync_core_dcache();
149 #endif
150                 cpumask_clear_cpu(cpu, msg->call_struct.waitmask);
151         }
152 }
153
154 /* Use IRQ_SUPPLE_0 to request reschedule.
155  * When returning from interrupt to user space,
156  * there is chance to reschedule */
157 static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
158 {
159         unsigned int cpu = smp_processor_id();
160
161         platform_clear_ipi(cpu, IRQ_SUPPLE_0);
162         return IRQ_HANDLED;
163 }
164
165 DECLARE_PER_CPU(struct clock_event_device, coretmr_events);
166 void ipi_timer(void)
167 {
168         int cpu = smp_processor_id();
169         struct clock_event_device *evt = &per_cpu(coretmr_events, cpu);
170         evt->event_handler(evt);
171 }
172
173 static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
174 {
175         struct ipi_message *msg;
176         struct ipi_message_queue *msg_queue;
177         unsigned int cpu = smp_processor_id();
178         unsigned long flags;
179
180         platform_clear_ipi(cpu, IRQ_SUPPLE_1);
181
182         msg_queue = &__get_cpu_var(ipi_msg_queue);
183
184         spin_lock_irqsave(&msg_queue->lock, flags);
185
186         while (msg_queue->count) {
187                 msg = &msg_queue->ipi_message[msg_queue->head];
188                 switch (msg->type) {
189                 case BFIN_IPI_TIMER:
190                         ipi_timer();
191                         break;
192                 case BFIN_IPI_RESCHEDULE:
193                         scheduler_ipi();
194                         break;
195                 case BFIN_IPI_CALL_FUNC:
196                         ipi_call_function(cpu, msg);
197                         break;
198                 case BFIN_IPI_CPU_STOP:
199                         ipi_cpu_stop(cpu);
200                         break;
201                 default:
202                         printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
203                                cpu, msg->type);
204                         break;
205                 }
206                 msg_queue->head++;
207                 msg_queue->head %= BFIN_IPI_MSGQ_LEN;
208                 msg_queue->count--;
209         }
210         spin_unlock_irqrestore(&msg_queue->lock, flags);
211         return IRQ_HANDLED;
212 }
213
214 static void ipi_queue_init(void)
215 {
216         unsigned int cpu;
217         struct ipi_message_queue *msg_queue;
218         for_each_possible_cpu(cpu) {
219                 msg_queue = &per_cpu(ipi_msg_queue, cpu);
220                 spin_lock_init(&msg_queue->lock);
221                 msg_queue->count = 0;
222                 msg_queue->head = 0;
223         }
224 }
225
226 static inline void smp_send_message(cpumask_t callmap, unsigned long type,
227                                         void (*func) (void *info), void *info, int wait)
228 {
229         unsigned int cpu;
230         struct ipi_message_queue *msg_queue;
231         struct ipi_message *msg;
232         unsigned long flags, next_msg;
233         cpumask_t waitmask; /* waitmask is shared by all cpus */
234
235         cpumask_copy(&waitmask, &callmap);
236         for_each_cpu(cpu, &callmap) {
237                 msg_queue = &per_cpu(ipi_msg_queue, cpu);
238                 spin_lock_irqsave(&msg_queue->lock, flags);
239                 if (msg_queue->count < BFIN_IPI_MSGQ_LEN) {
240                         next_msg = (msg_queue->head + msg_queue->count)
241                                         % BFIN_IPI_MSGQ_LEN;
242                         msg = &msg_queue->ipi_message[next_msg];
243                         msg->type = type;
244                         if (type == BFIN_IPI_CALL_FUNC) {
245                                 msg->call_struct.func = func;
246                                 msg->call_struct.info = info;
247                                 msg->call_struct.wait = wait;
248                                 msg->call_struct.waitmask = &waitmask;
249                         }
250                         msg_queue->count++;
251                 } else
252                         panic("IPI message queue overflow\n");
253                 spin_unlock_irqrestore(&msg_queue->lock, flags);
254                 platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
255         }
256
257         if (wait) {
258                 while (!cpumask_empty(&waitmask))
259                         blackfin_dcache_invalidate_range(
260                                 (unsigned long)(&waitmask),
261                                 (unsigned long)(&waitmask));
262 #ifdef __ARCH_SYNC_CORE_DCACHE
263                 /*
264                  * Invalidate D cache in case shared data was changed by
265                  * other processors to ensure cache coherence.
266                  */
267                 resync_core_dcache();
268 #endif
269         }
270 }
271
272 int smp_call_function(void (*func)(void *info), void *info, int wait)
273 {
274         cpumask_t callmap;
275
276         preempt_disable();
277         cpumask_copy(&callmap, cpu_online_mask);
278         cpumask_clear_cpu(smp_processor_id(), &callmap);
279         if (!cpumask_empty(&callmap))
280                 smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
281
282         preempt_enable();
283
284         return 0;
285 }
286 EXPORT_SYMBOL_GPL(smp_call_function);
287
288 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
289                                 int wait)
290 {
291         unsigned int cpu = cpuid;
292         cpumask_t callmap;
293
294         if (cpu_is_offline(cpu))
295                 return 0;
296         cpumask_clear(&callmap);
297         cpumask_set_cpu(cpu, &callmap);
298
299         smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
300
301         return 0;
302 }
303 EXPORT_SYMBOL_GPL(smp_call_function_single);
304
305 void smp_send_reschedule(int cpu)
306 {
307         cpumask_t callmap;
308         /* simply trigger an ipi */
309
310         cpumask_clear(&callmap);
311         cpumask_set_cpu(cpu, &callmap);
312
313         smp_send_message(callmap, BFIN_IPI_RESCHEDULE, NULL, NULL, 0);
314
315         return;
316 }
317
318 void smp_send_msg(const struct cpumask *mask, unsigned long type)
319 {
320         smp_send_message(*mask, type, NULL, NULL, 0);
321 }
322
323 void smp_timer_broadcast(const struct cpumask *mask)
324 {
325         smp_send_msg(mask, BFIN_IPI_TIMER);
326 }
327
328 void smp_send_stop(void)
329 {
330         cpumask_t callmap;
331
332         preempt_disable();
333         cpumask_copy(&callmap, cpu_online_mask);
334         cpumask_clear_cpu(smp_processor_id(), &callmap);
335         if (!cpumask_empty(&callmap))
336                 smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0);
337
338         preempt_enable();
339
340         return;
341 }
342
343 int __cpuinit __cpu_up(unsigned int cpu)
344 {
345         int ret;
346         struct blackfin_cpudata *ci = &per_cpu(cpu_data, cpu);
347         struct task_struct *idle = ci->idle;
348
349         if (idle) {
350                 free_task(idle);
351                 idle = NULL;
352         }
353
354         if (!idle) {
355                 idle = fork_idle(cpu);
356                 if (IS_ERR(idle)) {
357                         printk(KERN_ERR "CPU%u: fork() failed\n", cpu);
358                         return PTR_ERR(idle);
359                 }
360                 ci->idle = idle;
361         } else {
362                 init_idle(idle, cpu);
363         }
364         secondary_stack = task_stack_page(idle) + THREAD_SIZE;
365
366         ret = platform_boot_secondary(cpu, idle);
367
368         secondary_stack = NULL;
369
370         return ret;
371 }
372
373 static void __cpuinit setup_secondary(unsigned int cpu)
374 {
375         unsigned long ilat;
376
377         bfin_write_IMASK(0);
378         CSYNC();
379         ilat = bfin_read_ILAT();
380         CSYNC();
381         bfin_write_ILAT(ilat);
382         CSYNC();
383
384         /* Enable interrupt levels IVG7-15. IARs have been already
385          * programmed by the boot CPU.  */
386         bfin_irq_flags |= IMASK_IVG15 |
387             IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 |
388             IMASK_IVG10 | IMASK_IVG9 | IMASK_IVG8 | IMASK_IVG7 | IMASK_IVGHW;
389 }
390
391 void __cpuinit secondary_start_kernel(void)
392 {
393         unsigned int cpu = smp_processor_id();
394         struct mm_struct *mm = &init_mm;
395
396         if (_bfin_swrst & SWRST_DBL_FAULT_B) {
397                 printk(KERN_EMERG "CoreB Recovering from DOUBLE FAULT event\n");
398 #ifdef CONFIG_DEBUG_DOUBLEFAULT
399                 printk(KERN_EMERG " While handling exception (EXCAUSE = %#x) at %pF\n",
400                         initial_pda_coreb.seqstat_doublefault & SEQSTAT_EXCAUSE,
401                         initial_pda_coreb.retx_doublefault);
402                 printk(KERN_NOTICE "   DCPLB_FAULT_ADDR: %pF\n",
403                         initial_pda_coreb.dcplb_doublefault_addr);
404                 printk(KERN_NOTICE "   ICPLB_FAULT_ADDR: %pF\n",
405                         initial_pda_coreb.icplb_doublefault_addr);
406 #endif
407                 printk(KERN_NOTICE " The instruction at %pF caused a double exception\n",
408                         initial_pda_coreb.retx);
409         }
410
411         /*
412          * We want the D-cache to be enabled early, in case the atomic
413          * support code emulates cache coherence (see
414          * __ARCH_SYNC_CORE_DCACHE).
415          */
416         init_exception_vectors();
417
418         local_irq_disable();
419
420         /* Attach the new idle task to the global mm. */
421         atomic_inc(&mm->mm_users);
422         atomic_inc(&mm->mm_count);
423         current->active_mm = mm;
424
425         preempt_disable();
426
427         setup_secondary(cpu);
428
429         platform_secondary_init(cpu);
430
431         /* setup local core timer */
432         bfin_local_timer_setup();
433
434         local_irq_enable();
435
436         bfin_setup_caches(cpu);
437
438         notify_cpu_starting(cpu);
439         /*
440          * Calibrate loops per jiffy value.
441          * IRQs need to be enabled here - D-cache can be invalidated
442          * in timer irq handler, so core B can read correct jiffies.
443          */
444         calibrate_delay();
445
446         cpu_idle();
447 }
448
449 void __init smp_prepare_boot_cpu(void)
450 {
451 }
452
453 void __init smp_prepare_cpus(unsigned int max_cpus)
454 {
455         platform_prepare_cpus(max_cpus);
456         ipi_queue_init();
457         platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
458         platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
459 }
460
461 void __init smp_cpus_done(unsigned int max_cpus)
462 {
463         unsigned long bogosum = 0;
464         unsigned int cpu;
465
466         for_each_online_cpu(cpu)
467                 bogosum += loops_per_jiffy;
468
469         printk(KERN_INFO "SMP: Total of %d processors activated "
470                "(%lu.%02lu BogoMIPS).\n",
471                num_online_cpus(),
472                bogosum / (500000/HZ),
473                (bogosum / (5000/HZ)) % 100);
474 }
475
476 void smp_icache_flush_range_others(unsigned long start, unsigned long end)
477 {
478         smp_flush_data.start = start;
479         smp_flush_data.end = end;
480
481         preempt_disable();
482         if (smp_call_function(&ipi_flush_icache, &smp_flush_data, 1))
483                 printk(KERN_WARNING "SMP: failed to run I-cache flush request on other CPUs\n");
484         preempt_enable();
485 }
486 EXPORT_SYMBOL_GPL(smp_icache_flush_range_others);
487
488 #ifdef __ARCH_SYNC_CORE_ICACHE
489 unsigned long icache_invld_count[NR_CPUS];
490 void resync_core_icache(void)
491 {
492         unsigned int cpu = get_cpu();
493         blackfin_invalidate_entire_icache();
494         icache_invld_count[cpu]++;
495         put_cpu();
496 }
497 EXPORT_SYMBOL(resync_core_icache);
498 #endif
499
500 #ifdef __ARCH_SYNC_CORE_DCACHE
501 unsigned long dcache_invld_count[NR_CPUS];
502 unsigned long barrier_mask __attribute__ ((__section__(".l2.bss")));
503
504 void resync_core_dcache(void)
505 {
506         unsigned int cpu = get_cpu();
507         blackfin_invalidate_entire_dcache();
508         dcache_invld_count[cpu]++;
509         put_cpu();
510 }
511 EXPORT_SYMBOL(resync_core_dcache);
512 #endif
513
514 #ifdef CONFIG_HOTPLUG_CPU
515 int __cpuexit __cpu_disable(void)
516 {
517         unsigned int cpu = smp_processor_id();
518
519         if (cpu == 0)
520                 return -EPERM;
521
522         set_cpu_online(cpu, false);
523         return 0;
524 }
525
526 static DECLARE_COMPLETION(cpu_killed);
527
528 int __cpuexit __cpu_die(unsigned int cpu)
529 {
530         return wait_for_completion_timeout(&cpu_killed, 5000);
531 }
532
533 void cpu_die(void)
534 {
535         complete(&cpu_killed);
536
537         atomic_dec(&init_mm.mm_users);
538         atomic_dec(&init_mm.mm_count);
539
540         local_irq_disable();
541         platform_cpu_die();
542 }
543 #endif