Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  *      Remote softirq infrastructure is by Jens Axboe.
11  */
12
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
27
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
30
31 #include <asm/irq.h>
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
53 #endif
54
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56
57 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
59 char *softirq_to_name[NR_SOFTIRQS] = {
60         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61         "TASKLET", "SCHED", "HRTIMER",  "RCU"
62 };
63
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 void wakeup_softirqd(void)
71 {
72         /* Interrupts are disabled: no need to stop preemption */
73         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
74
75         if (tsk && tsk->state != TASK_RUNNING)
76                 wake_up_process(tsk);
77 }
78
79 /*
80  * preempt_count and SOFTIRQ_OFFSET usage:
81  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82  *   softirq processing.
83  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84  *   on local_bh_disable or local_bh_enable.
85  * This lets us distinguish between whether we are currently processing
86  * softirq and whether we just have bh disabled.
87  */
88
89 /*
90  * This one is for softirq.c-internal use,
91  * where hardirqs are disabled legitimately:
92  */
93 #ifdef CONFIG_TRACE_IRQFLAGS
94 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
95 {
96         unsigned long flags;
97
98         WARN_ON_ONCE(in_irq());
99
100         raw_local_irq_save(flags);
101         /*
102          * The preempt tracer hooks into add_preempt_count and will break
103          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104          * is set and before current->softirq_enabled is cleared.
105          * We must manually increment preempt_count here and manually
106          * call the trace_preempt_off later.
107          */
108         preempt_count() += cnt;
109         /*
110          * Were softirqs turned off above:
111          */
112         if (softirq_count() == cnt)
113                 trace_softirqs_off(ip);
114         raw_local_irq_restore(flags);
115
116         if (preempt_count() == cnt)
117                 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
118 }
119 #else /* !CONFIG_TRACE_IRQFLAGS */
120 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
121 {
122         add_preempt_count(cnt);
123         barrier();
124 }
125 #endif /* CONFIG_TRACE_IRQFLAGS */
126
127 void local_bh_disable(void)
128 {
129         __local_bh_disable((unsigned long)__builtin_return_address(0),
130                                 SOFTIRQ_DISABLE_OFFSET);
131 }
132
133 EXPORT_SYMBOL(local_bh_disable);
134
135 static void __local_bh_enable(unsigned int cnt)
136 {
137         WARN_ON_ONCE(in_irq());
138         WARN_ON_ONCE(!irqs_disabled());
139
140         if (softirq_count() == cnt)
141                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142         sub_preempt_count(cnt);
143 }
144
145 /*
146  * Special-case - softirqs can safely be enabled in
147  * cond_resched_softirq(), or by __do_softirq(),
148  * without processing still-pending softirqs:
149  */
150 void _local_bh_enable(void)
151 {
152         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
153 }
154
155 EXPORT_SYMBOL(_local_bh_enable);
156
157 static inline void _local_bh_enable_ip(unsigned long ip)
158 {
159         WARN_ON_ONCE(in_irq() || irqs_disabled());
160 #ifdef CONFIG_TRACE_IRQFLAGS
161         local_irq_disable();
162 #endif
163         /*
164          * Are softirqs going to be turned on now:
165          */
166         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
167                 trace_softirqs_on(ip);
168         /*
169          * Keep preemption disabled until we are done with
170          * softirq processing:
171          */
172         sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
173
174         if (unlikely(!in_interrupt() && local_softirq_pending()))
175                 do_softirq();
176
177         dec_preempt_count();
178 #ifdef CONFIG_TRACE_IRQFLAGS
179         local_irq_enable();
180 #endif
181         preempt_check_resched();
182 }
183
184 void local_bh_enable(void)
185 {
186         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187 }
188 EXPORT_SYMBOL(local_bh_enable);
189
190 void local_bh_enable_ip(unsigned long ip)
191 {
192         _local_bh_enable_ip(ip);
193 }
194 EXPORT_SYMBOL(local_bh_enable_ip);
195
196 /*
197  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198  * and we fall back to softirqd after that.
199  *
200  * This number has been established via experimentation.
201  * The two things to balance is latency against fairness -
202  * we want to handle softirqs as soon as possible, but they
203  * should not be able to lock up the box.
204  */
205 #define MAX_SOFTIRQ_RESTART 10
206
207 asmlinkage void __do_softirq(void)
208 {
209         struct softirq_action *h;
210         __u32 pending;
211         int max_restart = MAX_SOFTIRQ_RESTART;
212         int cpu;
213
214         pending = local_softirq_pending();
215         account_system_vtime(current);
216
217         __local_bh_disable((unsigned long)__builtin_return_address(0),
218                                 SOFTIRQ_OFFSET);
219         lockdep_softirq_enter();
220
221         cpu = smp_processor_id();
222 restart:
223         /* Reset the pending bitmask before enabling irqs */
224         set_softirq_pending(0);
225
226         local_irq_enable();
227
228         h = softirq_vec;
229
230         do {
231                 if (pending & 1) {
232                         int prev_count = preempt_count();
233                         kstat_incr_softirqs_this_cpu(h - softirq_vec);
234
235                         trace_softirq_entry(h, softirq_vec);
236                         h->action(h);
237                         trace_softirq_exit(h, softirq_vec);
238                         if (unlikely(prev_count != preempt_count())) {
239                                 printk(KERN_ERR "huh, entered softirq %td %s %p"
240                                        "with preempt_count %08x,"
241                                        " exited with %08x?\n", h - softirq_vec,
242                                        softirq_to_name[h - softirq_vec],
243                                        h->action, prev_count, preempt_count());
244                                 preempt_count() = prev_count;
245                         }
246
247                         rcu_bh_qs(cpu);
248                 }
249                 h++;
250                 pending >>= 1;
251         } while (pending);
252
253         local_irq_disable();
254
255         pending = local_softirq_pending();
256         if (pending && --max_restart)
257                 goto restart;
258
259         if (pending)
260                 wakeup_softirqd();
261
262         lockdep_softirq_exit();
263
264         account_system_vtime(current);
265         __local_bh_enable(SOFTIRQ_OFFSET);
266 }
267
268 #ifndef __ARCH_HAS_DO_SOFTIRQ
269
270 asmlinkage void do_softirq(void)
271 {
272         __u32 pending;
273         unsigned long flags;
274
275         if (in_interrupt())
276                 return;
277
278         local_irq_save(flags);
279
280         pending = local_softirq_pending();
281
282         if (pending)
283                 __do_softirq();
284
285         local_irq_restore(flags);
286 }
287
288 #endif
289
290 /*
291  * Enter an interrupt context.
292  */
293 void irq_enter(void)
294 {
295         int cpu = smp_processor_id();
296
297         rcu_irq_enter();
298         if (idle_cpu(cpu) && !in_interrupt()) {
299                 /*
300                  * Prevent raise_softirq from needlessly waking up ksoftirqd
301                  * here, as softirq will be serviced on return from interrupt.
302                  */
303                 local_bh_disable();
304                 tick_check_idle(cpu);
305                 _local_bh_enable();
306         }
307
308         __irq_enter();
309 }
310
311 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
312 # define invoke_softirq()       __do_softirq()
313 #else
314 # define invoke_softirq()       do_softirq()
315 #endif
316
317 /*
318  * Exit an interrupt context. Process softirqs if needed and possible:
319  */
320 void irq_exit(void)
321 {
322         account_system_vtime(current);
323         trace_hardirq_exit();
324         sub_preempt_count(IRQ_EXIT_OFFSET);
325         if (!in_interrupt() && local_softirq_pending())
326                 invoke_softirq();
327
328         rcu_irq_exit();
329 #ifdef CONFIG_NO_HZ
330         /* Make sure that timer wheel updates are propagated */
331         if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
332                 tick_nohz_stop_sched_tick(0);
333 #endif
334         preempt_enable_no_resched();
335 }
336
337 /*
338  * This function must run with irqs disabled!
339  */
340 inline void raise_softirq_irqoff(unsigned int nr)
341 {
342         __raise_softirq_irqoff(nr);
343
344         /*
345          * If we're in an interrupt or softirq, we're done
346          * (this also catches softirq-disabled code). We will
347          * actually run the softirq once we return from
348          * the irq or softirq.
349          *
350          * Otherwise we wake up ksoftirqd to make sure we
351          * schedule the softirq soon.
352          */
353         if (!in_interrupt())
354                 wakeup_softirqd();
355 }
356
357 void raise_softirq(unsigned int nr)
358 {
359         unsigned long flags;
360
361         local_irq_save(flags);
362         raise_softirq_irqoff(nr);
363         local_irq_restore(flags);
364 }
365
366 void open_softirq(int nr, void (*action)(struct softirq_action *))
367 {
368         softirq_vec[nr].action = action;
369 }
370
371 /*
372  * Tasklets
373  */
374 struct tasklet_head
375 {
376         struct tasklet_struct *head;
377         struct tasklet_struct **tail;
378 };
379
380 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
381 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
382
383 void __tasklet_schedule(struct tasklet_struct *t)
384 {
385         unsigned long flags;
386
387         local_irq_save(flags);
388         t->next = NULL;
389         *__get_cpu_var(tasklet_vec).tail = t;
390         __get_cpu_var(tasklet_vec).tail = &(t->next);
391         raise_softirq_irqoff(TASKLET_SOFTIRQ);
392         local_irq_restore(flags);
393 }
394
395 EXPORT_SYMBOL(__tasklet_schedule);
396
397 void __tasklet_hi_schedule(struct tasklet_struct *t)
398 {
399         unsigned long flags;
400
401         local_irq_save(flags);
402         t->next = NULL;
403         *__get_cpu_var(tasklet_hi_vec).tail = t;
404         __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
405         raise_softirq_irqoff(HI_SOFTIRQ);
406         local_irq_restore(flags);
407 }
408
409 EXPORT_SYMBOL(__tasklet_hi_schedule);
410
411 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
412 {
413         BUG_ON(!irqs_disabled());
414
415         t->next = __get_cpu_var(tasklet_hi_vec).head;
416         __get_cpu_var(tasklet_hi_vec).head = t;
417         __raise_softirq_irqoff(HI_SOFTIRQ);
418 }
419
420 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
421
422 static void tasklet_action(struct softirq_action *a)
423 {
424         struct tasklet_struct *list;
425
426         local_irq_disable();
427         list = __get_cpu_var(tasklet_vec).head;
428         __get_cpu_var(tasklet_vec).head = NULL;
429         __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
430         local_irq_enable();
431
432         while (list) {
433                 struct tasklet_struct *t = list;
434
435                 list = list->next;
436
437                 if (tasklet_trylock(t)) {
438                         if (!atomic_read(&t->count)) {
439                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
440                                         BUG();
441                                 t->func(t->data);
442                                 tasklet_unlock(t);
443                                 continue;
444                         }
445                         tasklet_unlock(t);
446                 }
447
448                 local_irq_disable();
449                 t->next = NULL;
450                 *__get_cpu_var(tasklet_vec).tail = t;
451                 __get_cpu_var(tasklet_vec).tail = &(t->next);
452                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
453                 local_irq_enable();
454         }
455 }
456
457 static void tasklet_hi_action(struct softirq_action *a)
458 {
459         struct tasklet_struct *list;
460
461         local_irq_disable();
462         list = __get_cpu_var(tasklet_hi_vec).head;
463         __get_cpu_var(tasklet_hi_vec).head = NULL;
464         __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
465         local_irq_enable();
466
467         while (list) {
468                 struct tasklet_struct *t = list;
469
470                 list = list->next;
471
472                 if (tasklet_trylock(t)) {
473                         if (!atomic_read(&t->count)) {
474                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
475                                         BUG();
476                                 t->func(t->data);
477                                 tasklet_unlock(t);
478                                 continue;
479                         }
480                         tasklet_unlock(t);
481                 }
482
483                 local_irq_disable();
484                 t->next = NULL;
485                 *__get_cpu_var(tasklet_hi_vec).tail = t;
486                 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
487                 __raise_softirq_irqoff(HI_SOFTIRQ);
488                 local_irq_enable();
489         }
490 }
491
492
493 void tasklet_init(struct tasklet_struct *t,
494                   void (*func)(unsigned long), unsigned long data)
495 {
496         t->next = NULL;
497         t->state = 0;
498         atomic_set(&t->count, 0);
499         t->func = func;
500         t->data = data;
501 }
502
503 EXPORT_SYMBOL(tasklet_init);
504
505 void tasklet_kill(struct tasklet_struct *t)
506 {
507         if (in_interrupt())
508                 printk("Attempt to kill tasklet from interrupt\n");
509
510         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
511                 do {
512                         yield();
513                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
514         }
515         tasklet_unlock_wait(t);
516         clear_bit(TASKLET_STATE_SCHED, &t->state);
517 }
518
519 EXPORT_SYMBOL(tasklet_kill);
520
521 /*
522  * tasklet_hrtimer
523  */
524
525 /*
526  * The trampoline is called when the hrtimer expires. It schedules a tasklet
527  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
528  * hrtimer callback, but from softirq context.
529  */
530 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
531 {
532         struct tasklet_hrtimer *ttimer =
533                 container_of(timer, struct tasklet_hrtimer, timer);
534
535         tasklet_hi_schedule(&ttimer->tasklet);
536         return HRTIMER_NORESTART;
537 }
538
539 /*
540  * Helper function which calls the hrtimer callback from
541  * tasklet/softirq context
542  */
543 static void __tasklet_hrtimer_trampoline(unsigned long data)
544 {
545         struct tasklet_hrtimer *ttimer = (void *)data;
546         enum hrtimer_restart restart;
547
548         restart = ttimer->function(&ttimer->timer);
549         if (restart != HRTIMER_NORESTART)
550                 hrtimer_restart(&ttimer->timer);
551 }
552
553 /**
554  * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
555  * @ttimer:      tasklet_hrtimer which is initialized
556  * @function:    hrtimer callback funtion which gets called from softirq context
557  * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
558  * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
559  */
560 void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
561                           enum hrtimer_restart (*function)(struct hrtimer *),
562                           clockid_t which_clock, enum hrtimer_mode mode)
563 {
564         hrtimer_init(&ttimer->timer, which_clock, mode);
565         ttimer->timer.function = __hrtimer_tasklet_trampoline;
566         tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
567                      (unsigned long)ttimer);
568         ttimer->function = function;
569 }
570 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
571
572 /*
573  * Remote softirq bits
574  */
575
576 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
577 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
578
579 static void __local_trigger(struct call_single_data *cp, int softirq)
580 {
581         struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
582
583         list_add_tail(&cp->list, head);
584
585         /* Trigger the softirq only if the list was previously empty.  */
586         if (head->next == &cp->list)
587                 raise_softirq_irqoff(softirq);
588 }
589
590 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
591 static void remote_softirq_receive(void *data)
592 {
593         struct call_single_data *cp = data;
594         unsigned long flags;
595         int softirq;
596
597         softirq = cp->priv;
598
599         local_irq_save(flags);
600         __local_trigger(cp, softirq);
601         local_irq_restore(flags);
602 }
603
604 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
605 {
606         if (cpu_online(cpu)) {
607                 cp->func = remote_softirq_receive;
608                 cp->info = cp;
609                 cp->flags = 0;
610                 cp->priv = softirq;
611
612                 __smp_call_function_single(cpu, cp, 0);
613                 return 0;
614         }
615         return 1;
616 }
617 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
618 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
619 {
620         return 1;
621 }
622 #endif
623
624 /**
625  * __send_remote_softirq - try to schedule softirq work on a remote cpu
626  * @cp: private SMP call function data area
627  * @cpu: the remote cpu
628  * @this_cpu: the currently executing cpu
629  * @softirq: the softirq for the work
630  *
631  * Attempt to schedule softirq work on a remote cpu.  If this cannot be
632  * done, the work is instead queued up on the local cpu.
633  *
634  * Interrupts must be disabled.
635  */
636 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
637 {
638         if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
639                 __local_trigger(cp, softirq);
640 }
641 EXPORT_SYMBOL(__send_remote_softirq);
642
643 /**
644  * send_remote_softirq - try to schedule softirq work on a remote cpu
645  * @cp: private SMP call function data area
646  * @cpu: the remote cpu
647  * @softirq: the softirq for the work
648  *
649  * Like __send_remote_softirq except that disabling interrupts and
650  * computing the current cpu is done for the caller.
651  */
652 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
653 {
654         unsigned long flags;
655         int this_cpu;
656
657         local_irq_save(flags);
658         this_cpu = smp_processor_id();
659         __send_remote_softirq(cp, cpu, this_cpu, softirq);
660         local_irq_restore(flags);
661 }
662 EXPORT_SYMBOL(send_remote_softirq);
663
664 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
665                                                unsigned long action, void *hcpu)
666 {
667         /*
668          * If a CPU goes away, splice its entries to the current CPU
669          * and trigger a run of the softirq
670          */
671         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
672                 int cpu = (unsigned long) hcpu;
673                 int i;
674
675                 local_irq_disable();
676                 for (i = 0; i < NR_SOFTIRQS; i++) {
677                         struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
678                         struct list_head *local_head;
679
680                         if (list_empty(head))
681                                 continue;
682
683                         local_head = &__get_cpu_var(softirq_work_list[i]);
684                         list_splice_init(head, local_head);
685                         raise_softirq_irqoff(i);
686                 }
687                 local_irq_enable();
688         }
689
690         return NOTIFY_OK;
691 }
692
693 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
694         .notifier_call  = remote_softirq_cpu_notify,
695 };
696
697 void __init softirq_init(void)
698 {
699         int cpu;
700
701         for_each_possible_cpu(cpu) {
702                 int i;
703
704                 per_cpu(tasklet_vec, cpu).tail =
705                         &per_cpu(tasklet_vec, cpu).head;
706                 per_cpu(tasklet_hi_vec, cpu).tail =
707                         &per_cpu(tasklet_hi_vec, cpu).head;
708                 for (i = 0; i < NR_SOFTIRQS; i++)
709                         INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
710         }
711
712         register_hotcpu_notifier(&remote_softirq_cpu_notifier);
713
714         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
715         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
716 }
717
718 static int run_ksoftirqd(void * __bind_cpu)
719 {
720         set_current_state(TASK_INTERRUPTIBLE);
721
722         current->flags |= PF_KSOFTIRQD;
723         while (!kthread_should_stop()) {
724                 preempt_disable();
725                 if (!local_softirq_pending()) {
726                         preempt_enable_no_resched();
727                         schedule();
728                         preempt_disable();
729                 }
730
731                 __set_current_state(TASK_RUNNING);
732
733                 while (local_softirq_pending()) {
734                         /* Preempt disable stops cpu going offline.
735                            If already offline, we'll be on wrong CPU:
736                            don't process */
737                         if (cpu_is_offline((long)__bind_cpu))
738                                 goto wait_to_die;
739                         do_softirq();
740                         preempt_enable_no_resched();
741                         cond_resched();
742                         preempt_disable();
743                         rcu_note_context_switch((long)__bind_cpu);
744                 }
745                 preempt_enable();
746                 set_current_state(TASK_INTERRUPTIBLE);
747         }
748         __set_current_state(TASK_RUNNING);
749         return 0;
750
751 wait_to_die:
752         preempt_enable();
753         /* Wait for kthread_stop */
754         set_current_state(TASK_INTERRUPTIBLE);
755         while (!kthread_should_stop()) {
756                 schedule();
757                 set_current_state(TASK_INTERRUPTIBLE);
758         }
759         __set_current_state(TASK_RUNNING);
760         return 0;
761 }
762
763 #ifdef CONFIG_HOTPLUG_CPU
764 /*
765  * tasklet_kill_immediate is called to remove a tasklet which can already be
766  * scheduled for execution on @cpu.
767  *
768  * Unlike tasklet_kill, this function removes the tasklet
769  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
770  *
771  * When this function is called, @cpu must be in the CPU_DEAD state.
772  */
773 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
774 {
775         struct tasklet_struct **i;
776
777         BUG_ON(cpu_online(cpu));
778         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
779
780         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
781                 return;
782
783         /* CPU is dead, so no lock needed. */
784         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
785                 if (*i == t) {
786                         *i = t->next;
787                         /* If this was the tail element, move the tail ptr */
788                         if (*i == NULL)
789                                 per_cpu(tasklet_vec, cpu).tail = i;
790                         return;
791                 }
792         }
793         BUG();
794 }
795
796 static void takeover_tasklets(unsigned int cpu)
797 {
798         /* CPU is dead, so no lock needed. */
799         local_irq_disable();
800
801         /* Find end, append list for that CPU. */
802         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
803                 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
804                 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
805                 per_cpu(tasklet_vec, cpu).head = NULL;
806                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
807         }
808         raise_softirq_irqoff(TASKLET_SOFTIRQ);
809
810         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
811                 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
812                 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
813                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
814                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
815         }
816         raise_softirq_irqoff(HI_SOFTIRQ);
817
818         local_irq_enable();
819 }
820 #endif /* CONFIG_HOTPLUG_CPU */
821
822 static int __cpuinit cpu_callback(struct notifier_block *nfb,
823                                   unsigned long action,
824                                   void *hcpu)
825 {
826         int hotcpu = (unsigned long)hcpu;
827         struct task_struct *p;
828
829         switch (action) {
830         case CPU_UP_PREPARE:
831         case CPU_UP_PREPARE_FROZEN:
832                 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
833                 if (IS_ERR(p)) {
834                         printk("ksoftirqd for %i failed\n", hotcpu);
835                         return notifier_from_errno(PTR_ERR(p));
836                 }
837                 kthread_bind(p, hotcpu);
838                 per_cpu(ksoftirqd, hotcpu) = p;
839                 break;
840         case CPU_ONLINE:
841         case CPU_ONLINE_FROZEN:
842                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
843                 break;
844 #ifdef CONFIG_HOTPLUG_CPU
845         case CPU_UP_CANCELED:
846         case CPU_UP_CANCELED_FROZEN:
847                 if (!per_cpu(ksoftirqd, hotcpu))
848                         break;
849                 /* Unbind so it can run.  Fall thru. */
850                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
851                              cpumask_any(cpu_online_mask));
852         case CPU_DEAD:
853         case CPU_DEAD_FROZEN: {
854                 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
855
856                 p = per_cpu(ksoftirqd, hotcpu);
857                 per_cpu(ksoftirqd, hotcpu) = NULL;
858                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
859                 kthread_stop(p);
860                 takeover_tasklets(hotcpu);
861                 break;
862         }
863 #endif /* CONFIG_HOTPLUG_CPU */
864         }
865         return NOTIFY_OK;
866 }
867
868 static struct notifier_block __cpuinitdata cpu_nfb = {
869         .notifier_call = cpu_callback
870 };
871
872 static __init int spawn_ksoftirqd(void)
873 {
874         void *cpu = (void *)(long)smp_processor_id();
875         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
876
877         BUG_ON(err != NOTIFY_OK);
878         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
879         register_cpu_notifier(&cpu_nfb);
880         return 0;
881 }
882 early_initcall(spawn_ksoftirqd);
883
884 #ifdef CONFIG_SMP
885 /*
886  * Call a function on all processors
887  */
888 int on_each_cpu(void (*func) (void *info), void *info, int wait)
889 {
890         int ret = 0;
891
892         preempt_disable();
893         ret = smp_call_function(func, info, wait);
894         local_irq_disable();
895         func(info);
896         local_irq_enable();
897         preempt_enable();
898         return ret;
899 }
900 EXPORT_SYMBOL(on_each_cpu);
901 #endif
902
903 /*
904  * [ These __weak aliases are kept in a separate compilation unit, so that
905  *   GCC does not inline them incorrectly. ]
906  */
907
908 int __init __weak early_irq_init(void)
909 {
910         return 0;
911 }
912
913 #ifdef CONFIG_GENERIC_HARDIRQS
914 int __init __weak arch_probe_nr_irqs(void)
915 {
916         return NR_IRQS_LEGACY;
917 }
918
919 int __init __weak arch_early_irq_init(void)
920 {
921         return 0;
922 }
923 #endif