NOHZ: unify the nohz function calls in irq_enter()
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48
49 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
50
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61         /* Interrupts are disabled: no need to stop preemption */
62         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63
64         if (tsk && tsk->state != TASK_RUNNING)
65                 wake_up_process(tsk);
66 }
67
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75         unsigned long flags;
76
77         WARN_ON_ONCE(in_irq());
78
79         raw_local_irq_save(flags);
80         add_preempt_count(SOFTIRQ_OFFSET);
81         /*
82          * Were softirqs turned off above:
83          */
84         if (softirq_count() == SOFTIRQ_OFFSET)
85                 trace_softirqs_off(ip);
86         raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91         add_preempt_count(SOFTIRQ_OFFSET);
92         barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95
96 void local_bh_disable(void)
97 {
98         __local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100
101 EXPORT_SYMBOL(local_bh_disable);
102
103 void __local_bh_enable(void)
104 {
105         WARN_ON_ONCE(in_irq());
106
107         /*
108          * softirqs should never be enabled by __local_bh_enable(),
109          * it always nests inside local_bh_enable() sections:
110          */
111         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112
113         sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124         WARN_ON_ONCE(in_irq());
125         WARN_ON_ONCE(!irqs_disabled());
126
127         if (softirq_count() == SOFTIRQ_OFFSET)
128                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
129         sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131
132 EXPORT_SYMBOL(_local_bh_enable);
133
134 static inline void _local_bh_enable_ip(unsigned long ip)
135 {
136         WARN_ON_ONCE(in_irq() || irqs_disabled());
137 #ifdef CONFIG_TRACE_IRQFLAGS
138         local_irq_disable();
139 #endif
140         /*
141          * Are softirqs going to be turned on now:
142          */
143         if (softirq_count() == SOFTIRQ_OFFSET)
144                 trace_softirqs_on(ip);
145         /*
146          * Keep preemption disabled until we are done with
147          * softirq processing:
148          */
149         sub_preempt_count(SOFTIRQ_OFFSET - 1);
150
151         if (unlikely(!in_interrupt() && local_softirq_pending()))
152                 do_softirq();
153
154         dec_preempt_count();
155 #ifdef CONFIG_TRACE_IRQFLAGS
156         local_irq_enable();
157 #endif
158         preempt_check_resched();
159 }
160
161 void local_bh_enable(void)
162 {
163         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
164 }
165 EXPORT_SYMBOL(local_bh_enable);
166
167 void local_bh_enable_ip(unsigned long ip)
168 {
169         _local_bh_enable_ip(ip);
170 }
171 EXPORT_SYMBOL(local_bh_enable_ip);
172
173 /*
174  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
175  * and we fall back to softirqd after that.
176  *
177  * This number has been established via experimentation.
178  * The two things to balance is latency against fairness -
179  * we want to handle softirqs as soon as possible, but they
180  * should not be able to lock up the box.
181  */
182 #define MAX_SOFTIRQ_RESTART 10
183
184 asmlinkage void __do_softirq(void)
185 {
186         struct softirq_action *h;
187         __u32 pending;
188         int max_restart = MAX_SOFTIRQ_RESTART;
189         int cpu;
190
191         pending = local_softirq_pending();
192         account_system_vtime(current);
193
194         __local_bh_disable((unsigned long)__builtin_return_address(0));
195         trace_softirq_enter();
196
197         cpu = smp_processor_id();
198 restart:
199         /* Reset the pending bitmask before enabling irqs */
200         set_softirq_pending(0);
201
202         local_irq_enable();
203
204         h = softirq_vec;
205
206         do {
207                 if (pending & 1) {
208                         int prev_count = preempt_count();
209
210                         h->action(h);
211
212                         if (unlikely(prev_count != preempt_count())) {
213                                 printk(KERN_ERR "huh, entered softirq %td %p"
214                                        "with preempt_count %08x,"
215                                        " exited with %08x?\n", h - softirq_vec,
216                                        h->action, prev_count, preempt_count());
217                                 preempt_count() = prev_count;
218                         }
219
220                         rcu_bh_qsctr_inc(cpu);
221                 }
222                 h++;
223                 pending >>= 1;
224         } while (pending);
225
226         local_irq_disable();
227
228         pending = local_softirq_pending();
229         if (pending && --max_restart)
230                 goto restart;
231
232         if (pending)
233                 wakeup_softirqd();
234
235         trace_softirq_exit();
236
237         account_system_vtime(current);
238         _local_bh_enable();
239 }
240
241 #ifndef __ARCH_HAS_DO_SOFTIRQ
242
243 asmlinkage void do_softirq(void)
244 {
245         __u32 pending;
246         unsigned long flags;
247
248         if (in_interrupt())
249                 return;
250
251         local_irq_save(flags);
252
253         pending = local_softirq_pending();
254
255         if (pending)
256                 __do_softirq();
257
258         local_irq_restore(flags);
259 }
260
261 #endif
262
263 /*
264  * Enter an interrupt context.
265  */
266 void irq_enter(void)
267 {
268         int cpu = smp_processor_id();
269
270         if (idle_cpu(cpu) && !in_interrupt())
271                 tick_check_idle(cpu);
272
273         __irq_enter();
274 }
275
276 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277 # define invoke_softirq()       __do_softirq()
278 #else
279 # define invoke_softirq()       do_softirq()
280 #endif
281
282 /*
283  * Exit an interrupt context. Process softirqs if needed and possible:
284  */
285 void irq_exit(void)
286 {
287         account_system_vtime(current);
288         trace_hardirq_exit();
289         sub_preempt_count(IRQ_EXIT_OFFSET);
290         if (!in_interrupt() && local_softirq_pending())
291                 invoke_softirq();
292
293 #ifdef CONFIG_NO_HZ
294         /* Make sure that timer wheel updates are propagated */
295         if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
296                 tick_nohz_stop_sched_tick(0);
297         rcu_irq_exit();
298 #endif
299         preempt_enable_no_resched();
300 }
301
302 /*
303  * This function must run with irqs disabled!
304  */
305 inline void raise_softirq_irqoff(unsigned int nr)
306 {
307         __raise_softirq_irqoff(nr);
308
309         /*
310          * If we're in an interrupt or softirq, we're done
311          * (this also catches softirq-disabled code). We will
312          * actually run the softirq once we return from
313          * the irq or softirq.
314          *
315          * Otherwise we wake up ksoftirqd to make sure we
316          * schedule the softirq soon.
317          */
318         if (!in_interrupt())
319                 wakeup_softirqd();
320 }
321
322 void raise_softirq(unsigned int nr)
323 {
324         unsigned long flags;
325
326         local_irq_save(flags);
327         raise_softirq_irqoff(nr);
328         local_irq_restore(flags);
329 }
330
331 void open_softirq(int nr, void (*action)(struct softirq_action *))
332 {
333         softirq_vec[nr].action = action;
334 }
335
336 /* Tasklets */
337 struct tasklet_head
338 {
339         struct tasklet_struct *head;
340         struct tasklet_struct **tail;
341 };
342
343 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
344 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
345
346 void __tasklet_schedule(struct tasklet_struct *t)
347 {
348         unsigned long flags;
349
350         local_irq_save(flags);
351         t->next = NULL;
352         *__get_cpu_var(tasklet_vec).tail = t;
353         __get_cpu_var(tasklet_vec).tail = &(t->next);
354         raise_softirq_irqoff(TASKLET_SOFTIRQ);
355         local_irq_restore(flags);
356 }
357
358 EXPORT_SYMBOL(__tasklet_schedule);
359
360 void __tasklet_hi_schedule(struct tasklet_struct *t)
361 {
362         unsigned long flags;
363
364         local_irq_save(flags);
365         t->next = NULL;
366         *__get_cpu_var(tasklet_hi_vec).tail = t;
367         __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
368         raise_softirq_irqoff(HI_SOFTIRQ);
369         local_irq_restore(flags);
370 }
371
372 EXPORT_SYMBOL(__tasklet_hi_schedule);
373
374 static void tasklet_action(struct softirq_action *a)
375 {
376         struct tasklet_struct *list;
377
378         local_irq_disable();
379         list = __get_cpu_var(tasklet_vec).head;
380         __get_cpu_var(tasklet_vec).head = NULL;
381         __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
382         local_irq_enable();
383
384         while (list) {
385                 struct tasklet_struct *t = list;
386
387                 list = list->next;
388
389                 if (tasklet_trylock(t)) {
390                         if (!atomic_read(&t->count)) {
391                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
392                                         BUG();
393                                 t->func(t->data);
394                                 tasklet_unlock(t);
395                                 continue;
396                         }
397                         tasklet_unlock(t);
398                 }
399
400                 local_irq_disable();
401                 t->next = NULL;
402                 *__get_cpu_var(tasklet_vec).tail = t;
403                 __get_cpu_var(tasklet_vec).tail = &(t->next);
404                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
405                 local_irq_enable();
406         }
407 }
408
409 static void tasklet_hi_action(struct softirq_action *a)
410 {
411         struct tasklet_struct *list;
412
413         local_irq_disable();
414         list = __get_cpu_var(tasklet_hi_vec).head;
415         __get_cpu_var(tasklet_hi_vec).head = NULL;
416         __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
417         local_irq_enable();
418
419         while (list) {
420                 struct tasklet_struct *t = list;
421
422                 list = list->next;
423
424                 if (tasklet_trylock(t)) {
425                         if (!atomic_read(&t->count)) {
426                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
427                                         BUG();
428                                 t->func(t->data);
429                                 tasklet_unlock(t);
430                                 continue;
431                         }
432                         tasklet_unlock(t);
433                 }
434
435                 local_irq_disable();
436                 t->next = NULL;
437                 *__get_cpu_var(tasklet_hi_vec).tail = t;
438                 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
439                 __raise_softirq_irqoff(HI_SOFTIRQ);
440                 local_irq_enable();
441         }
442 }
443
444
445 void tasklet_init(struct tasklet_struct *t,
446                   void (*func)(unsigned long), unsigned long data)
447 {
448         t->next = NULL;
449         t->state = 0;
450         atomic_set(&t->count, 0);
451         t->func = func;
452         t->data = data;
453 }
454
455 EXPORT_SYMBOL(tasklet_init);
456
457 void tasklet_kill(struct tasklet_struct *t)
458 {
459         if (in_interrupt())
460                 printk("Attempt to kill tasklet from interrupt\n");
461
462         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
463                 do
464                         yield();
465                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
466         }
467         tasklet_unlock_wait(t);
468         clear_bit(TASKLET_STATE_SCHED, &t->state);
469 }
470
471 EXPORT_SYMBOL(tasklet_kill);
472
473 void __init softirq_init(void)
474 {
475         int cpu;
476
477         for_each_possible_cpu(cpu) {
478                 per_cpu(tasklet_vec, cpu).tail =
479                         &per_cpu(tasklet_vec, cpu).head;
480                 per_cpu(tasklet_hi_vec, cpu).tail =
481                         &per_cpu(tasklet_hi_vec, cpu).head;
482         }
483
484         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
485         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
486 }
487
488 static int ksoftirqd(void * __bind_cpu)
489 {
490         set_current_state(TASK_INTERRUPTIBLE);
491
492         while (!kthread_should_stop()) {
493                 preempt_disable();
494                 if (!local_softirq_pending()) {
495                         preempt_enable_no_resched();
496                         schedule();
497                         preempt_disable();
498                 }
499
500                 __set_current_state(TASK_RUNNING);
501
502                 while (local_softirq_pending()) {
503                         /* Preempt disable stops cpu going offline.
504                            If already offline, we'll be on wrong CPU:
505                            don't process */
506                         if (cpu_is_offline((long)__bind_cpu))
507                                 goto wait_to_die;
508                         do_softirq();
509                         preempt_enable_no_resched();
510                         cond_resched();
511                         preempt_disable();
512                 }
513                 preempt_enable();
514                 set_current_state(TASK_INTERRUPTIBLE);
515         }
516         __set_current_state(TASK_RUNNING);
517         return 0;
518
519 wait_to_die:
520         preempt_enable();
521         /* Wait for kthread_stop */
522         set_current_state(TASK_INTERRUPTIBLE);
523         while (!kthread_should_stop()) {
524                 schedule();
525                 set_current_state(TASK_INTERRUPTIBLE);
526         }
527         __set_current_state(TASK_RUNNING);
528         return 0;
529 }
530
531 #ifdef CONFIG_HOTPLUG_CPU
532 /*
533  * tasklet_kill_immediate is called to remove a tasklet which can already be
534  * scheduled for execution on @cpu.
535  *
536  * Unlike tasklet_kill, this function removes the tasklet
537  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
538  *
539  * When this function is called, @cpu must be in the CPU_DEAD state.
540  */
541 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
542 {
543         struct tasklet_struct **i;
544
545         BUG_ON(cpu_online(cpu));
546         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
547
548         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
549                 return;
550
551         /* CPU is dead, so no lock needed. */
552         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
553                 if (*i == t) {
554                         *i = t->next;
555                         /* If this was the tail element, move the tail ptr */
556                         if (*i == NULL)
557                                 per_cpu(tasklet_vec, cpu).tail = i;
558                         return;
559                 }
560         }
561         BUG();
562 }
563
564 static void takeover_tasklets(unsigned int cpu)
565 {
566         /* CPU is dead, so no lock needed. */
567         local_irq_disable();
568
569         /* Find end, append list for that CPU. */
570         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
571                 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
572                 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
573                 per_cpu(tasklet_vec, cpu).head = NULL;
574                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
575         }
576         raise_softirq_irqoff(TASKLET_SOFTIRQ);
577
578         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
579                 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
580                 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
581                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
582                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
583         }
584         raise_softirq_irqoff(HI_SOFTIRQ);
585
586         local_irq_enable();
587 }
588 #endif /* CONFIG_HOTPLUG_CPU */
589
590 static int __cpuinit cpu_callback(struct notifier_block *nfb,
591                                   unsigned long action,
592                                   void *hcpu)
593 {
594         int hotcpu = (unsigned long)hcpu;
595         struct task_struct *p;
596
597         switch (action) {
598         case CPU_UP_PREPARE:
599         case CPU_UP_PREPARE_FROZEN:
600                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
601                 if (IS_ERR(p)) {
602                         printk("ksoftirqd for %i failed\n", hotcpu);
603                         return NOTIFY_BAD;
604                 }
605                 kthread_bind(p, hotcpu);
606                 per_cpu(ksoftirqd, hotcpu) = p;
607                 break;
608         case CPU_ONLINE:
609         case CPU_ONLINE_FROZEN:
610                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
611                 break;
612 #ifdef CONFIG_HOTPLUG_CPU
613         case CPU_UP_CANCELED:
614         case CPU_UP_CANCELED_FROZEN:
615                 if (!per_cpu(ksoftirqd, hotcpu))
616                         break;
617                 /* Unbind so it can run.  Fall thru. */
618                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
619                              any_online_cpu(cpu_online_map));
620         case CPU_DEAD:
621         case CPU_DEAD_FROZEN: {
622                 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
623
624                 p = per_cpu(ksoftirqd, hotcpu);
625                 per_cpu(ksoftirqd, hotcpu) = NULL;
626                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
627                 kthread_stop(p);
628                 takeover_tasklets(hotcpu);
629                 break;
630         }
631 #endif /* CONFIG_HOTPLUG_CPU */
632         }
633         return NOTIFY_OK;
634 }
635
636 static struct notifier_block __cpuinitdata cpu_nfb = {
637         .notifier_call = cpu_callback
638 };
639
640 static __init int spawn_ksoftirqd(void)
641 {
642         void *cpu = (void *)(long)smp_processor_id();
643         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
644
645         BUG_ON(err == NOTIFY_BAD);
646         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
647         register_cpu_notifier(&cpu_nfb);
648         return 0;
649 }
650 early_initcall(spawn_ksoftirqd);
651
652 #ifdef CONFIG_SMP
653 /*
654  * Call a function on all processors
655  */
656 int on_each_cpu(void (*func) (void *info), void *info, int wait)
657 {
658         int ret = 0;
659
660         preempt_disable();
661         ret = smp_call_function(func, info, wait);
662         local_irq_disable();
663         func(info);
664         local_irq_enable();
665         preempt_enable();
666         return ret;
667 }
668 EXPORT_SYMBOL(on_each_cpu);
669 #endif