Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  *      Distribute under GPLv2.
7  *
8  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48
49 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
50
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61         /* Interrupts are disabled: no need to stop preemption */
62         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63
64         if (tsk && tsk->state != TASK_RUNNING)
65                 wake_up_process(tsk);
66 }
67
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75         unsigned long flags;
76
77         WARN_ON_ONCE(in_irq());
78
79         raw_local_irq_save(flags);
80         add_preempt_count(SOFTIRQ_OFFSET);
81         /*
82          * Were softirqs turned off above:
83          */
84         if (softirq_count() == SOFTIRQ_OFFSET)
85                 trace_softirqs_off(ip);
86         raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91         add_preempt_count(SOFTIRQ_OFFSET);
92         barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95
96 void local_bh_disable(void)
97 {
98         __local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100
101 EXPORT_SYMBOL(local_bh_disable);
102
103 void __local_bh_enable(void)
104 {
105         WARN_ON_ONCE(in_irq());
106
107         /*
108          * softirqs should never be enabled by __local_bh_enable(),
109          * it always nests inside local_bh_enable() sections:
110          */
111         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112
113         sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124         WARN_ON_ONCE(in_irq());
125         WARN_ON_ONCE(!irqs_disabled());
126
127         if (softirq_count() == SOFTIRQ_OFFSET)
128                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
129         sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131
132 EXPORT_SYMBOL(_local_bh_enable);
133
134 static inline void _local_bh_enable_ip(unsigned long ip)
135 {
136         WARN_ON_ONCE(in_irq() || irqs_disabled());
137 #ifdef CONFIG_TRACE_IRQFLAGS
138         local_irq_disable();
139 #endif
140         /*
141          * Are softirqs going to be turned on now:
142          */
143         if (softirq_count() == SOFTIRQ_OFFSET)
144                 trace_softirqs_on(ip);
145         /*
146          * Keep preemption disabled until we are done with
147          * softirq processing:
148          */
149         sub_preempt_count(SOFTIRQ_OFFSET - 1);
150
151         if (unlikely(!in_interrupt() && local_softirq_pending()))
152                 do_softirq();
153
154         dec_preempt_count();
155 #ifdef CONFIG_TRACE_IRQFLAGS
156         local_irq_enable();
157 #endif
158         preempt_check_resched();
159 }
160
161 void local_bh_enable(void)
162 {
163         _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
164 }
165 EXPORT_SYMBOL(local_bh_enable);
166
167 void local_bh_enable_ip(unsigned long ip)
168 {
169         _local_bh_enable_ip(ip);
170 }
171 EXPORT_SYMBOL(local_bh_enable_ip);
172
173 /*
174  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
175  * and we fall back to softirqd after that.
176  *
177  * This number has been established via experimentation.
178  * The two things to balance is latency against fairness -
179  * we want to handle softirqs as soon as possible, but they
180  * should not be able to lock up the box.
181  */
182 #define MAX_SOFTIRQ_RESTART 10
183
184 asmlinkage void __do_softirq(void)
185 {
186         struct softirq_action *h;
187         __u32 pending;
188         int max_restart = MAX_SOFTIRQ_RESTART;
189         int cpu;
190
191         pending = local_softirq_pending();
192         account_system_vtime(current);
193
194         __local_bh_disable((unsigned long)__builtin_return_address(0));
195         trace_softirq_enter();
196
197         cpu = smp_processor_id();
198 restart:
199         /* Reset the pending bitmask before enabling irqs */
200         set_softirq_pending(0);
201
202         local_irq_enable();
203
204         h = softirq_vec;
205
206         do {
207                 if (pending & 1) {
208                         int prev_count = preempt_count();
209
210                         h->action(h);
211
212                         if (unlikely(prev_count != preempt_count())) {
213                                 printk(KERN_ERR "huh, entered softirq %td %p"
214                                        "with preempt_count %08x,"
215                                        " exited with %08x?\n", h - softirq_vec,
216                                        h->action, prev_count, preempt_count());
217                                 preempt_count() = prev_count;
218                         }
219
220                         rcu_bh_qsctr_inc(cpu);
221                 }
222                 h++;
223                 pending >>= 1;
224         } while (pending);
225
226         local_irq_disable();
227
228         pending = local_softirq_pending();
229         if (pending && --max_restart)
230                 goto restart;
231
232         if (pending)
233                 wakeup_softirqd();
234
235         trace_softirq_exit();
236
237         account_system_vtime(current);
238         _local_bh_enable();
239 }
240
241 #ifndef __ARCH_HAS_DO_SOFTIRQ
242
243 asmlinkage void do_softirq(void)
244 {
245         __u32 pending;
246         unsigned long flags;
247
248         if (in_interrupt())
249                 return;
250
251         local_irq_save(flags);
252
253         pending = local_softirq_pending();
254
255         if (pending)
256                 __do_softirq();
257
258         local_irq_restore(flags);
259 }
260
261 #endif
262
263 /*
264  * Enter an interrupt context.
265  */
266 void irq_enter(void)
267 {
268 #ifdef CONFIG_NO_HZ
269         int cpu = smp_processor_id();
270         if (idle_cpu(cpu) && !in_interrupt())
271                 tick_nohz_stop_idle(cpu);
272 #endif
273         __irq_enter();
274 #ifdef CONFIG_NO_HZ
275         if (idle_cpu(cpu))
276                 tick_nohz_update_jiffies();
277 #endif
278 }
279
280 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
281 # define invoke_softirq()       __do_softirq()
282 #else
283 # define invoke_softirq()       do_softirq()
284 #endif
285
286 /*
287  * Exit an interrupt context. Process softirqs if needed and possible:
288  */
289 void irq_exit(void)
290 {
291         account_system_vtime(current);
292         trace_hardirq_exit();
293         sub_preempt_count(IRQ_EXIT_OFFSET);
294         if (!in_interrupt() && local_softirq_pending())
295                 invoke_softirq();
296
297 #ifdef CONFIG_NO_HZ
298         /* Make sure that timer wheel updates are propagated */
299         if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
300                 tick_nohz_stop_sched_tick(0);
301         rcu_irq_exit();
302 #endif
303         preempt_enable_no_resched();
304 }
305
306 /*
307  * This function must run with irqs disabled!
308  */
309 inline void raise_softirq_irqoff(unsigned int nr)
310 {
311         __raise_softirq_irqoff(nr);
312
313         /*
314          * If we're in an interrupt or softirq, we're done
315          * (this also catches softirq-disabled code). We will
316          * actually run the softirq once we return from
317          * the irq or softirq.
318          *
319          * Otherwise we wake up ksoftirqd to make sure we
320          * schedule the softirq soon.
321          */
322         if (!in_interrupt())
323                 wakeup_softirqd();
324 }
325
326 void raise_softirq(unsigned int nr)
327 {
328         unsigned long flags;
329
330         local_irq_save(flags);
331         raise_softirq_irqoff(nr);
332         local_irq_restore(flags);
333 }
334
335 void open_softirq(int nr, void (*action)(struct softirq_action *))
336 {
337         softirq_vec[nr].action = action;
338 }
339
340 /* Tasklets */
341 struct tasklet_head
342 {
343         struct tasklet_struct *head;
344         struct tasklet_struct **tail;
345 };
346
347 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
348 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
349
350 void __tasklet_schedule(struct tasklet_struct *t)
351 {
352         unsigned long flags;
353
354         local_irq_save(flags);
355         t->next = NULL;
356         *__get_cpu_var(tasklet_vec).tail = t;
357         __get_cpu_var(tasklet_vec).tail = &(t->next);
358         raise_softirq_irqoff(TASKLET_SOFTIRQ);
359         local_irq_restore(flags);
360 }
361
362 EXPORT_SYMBOL(__tasklet_schedule);
363
364 void __tasklet_hi_schedule(struct tasklet_struct *t)
365 {
366         unsigned long flags;
367
368         local_irq_save(flags);
369         t->next = NULL;
370         *__get_cpu_var(tasklet_hi_vec).tail = t;
371         __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
372         raise_softirq_irqoff(HI_SOFTIRQ);
373         local_irq_restore(flags);
374 }
375
376 EXPORT_SYMBOL(__tasklet_hi_schedule);
377
378 static void tasklet_action(struct softirq_action *a)
379 {
380         struct tasklet_struct *list;
381
382         local_irq_disable();
383         list = __get_cpu_var(tasklet_vec).head;
384         __get_cpu_var(tasklet_vec).head = NULL;
385         __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
386         local_irq_enable();
387
388         while (list) {
389                 struct tasklet_struct *t = list;
390
391                 list = list->next;
392
393                 if (tasklet_trylock(t)) {
394                         if (!atomic_read(&t->count)) {
395                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
396                                         BUG();
397                                 t->func(t->data);
398                                 tasklet_unlock(t);
399                                 continue;
400                         }
401                         tasklet_unlock(t);
402                 }
403
404                 local_irq_disable();
405                 t->next = NULL;
406                 *__get_cpu_var(tasklet_vec).tail = t;
407                 __get_cpu_var(tasklet_vec).tail = &(t->next);
408                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
409                 local_irq_enable();
410         }
411 }
412
413 static void tasklet_hi_action(struct softirq_action *a)
414 {
415         struct tasklet_struct *list;
416
417         local_irq_disable();
418         list = __get_cpu_var(tasklet_hi_vec).head;
419         __get_cpu_var(tasklet_hi_vec).head = NULL;
420         __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
421         local_irq_enable();
422
423         while (list) {
424                 struct tasklet_struct *t = list;
425
426                 list = list->next;
427
428                 if (tasklet_trylock(t)) {
429                         if (!atomic_read(&t->count)) {
430                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
431                                         BUG();
432                                 t->func(t->data);
433                                 tasklet_unlock(t);
434                                 continue;
435                         }
436                         tasklet_unlock(t);
437                 }
438
439                 local_irq_disable();
440                 t->next = NULL;
441                 *__get_cpu_var(tasklet_hi_vec).tail = t;
442                 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
443                 __raise_softirq_irqoff(HI_SOFTIRQ);
444                 local_irq_enable();
445         }
446 }
447
448
449 void tasklet_init(struct tasklet_struct *t,
450                   void (*func)(unsigned long), unsigned long data)
451 {
452         t->next = NULL;
453         t->state = 0;
454         atomic_set(&t->count, 0);
455         t->func = func;
456         t->data = data;
457 }
458
459 EXPORT_SYMBOL(tasklet_init);
460
461 void tasklet_kill(struct tasklet_struct *t)
462 {
463         if (in_interrupt())
464                 printk("Attempt to kill tasklet from interrupt\n");
465
466         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
467                 do
468                         yield();
469                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
470         }
471         tasklet_unlock_wait(t);
472         clear_bit(TASKLET_STATE_SCHED, &t->state);
473 }
474
475 EXPORT_SYMBOL(tasklet_kill);
476
477 void __init softirq_init(void)
478 {
479         int cpu;
480
481         for_each_possible_cpu(cpu) {
482                 per_cpu(tasklet_vec, cpu).tail =
483                         &per_cpu(tasklet_vec, cpu).head;
484                 per_cpu(tasklet_hi_vec, cpu).tail =
485                         &per_cpu(tasklet_hi_vec, cpu).head;
486         }
487
488         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
489         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
490 }
491
492 static int ksoftirqd(void * __bind_cpu)
493 {
494         set_current_state(TASK_INTERRUPTIBLE);
495
496         while (!kthread_should_stop()) {
497                 preempt_disable();
498                 if (!local_softirq_pending()) {
499                         preempt_enable_no_resched();
500                         schedule();
501                         preempt_disable();
502                 }
503
504                 __set_current_state(TASK_RUNNING);
505
506                 while (local_softirq_pending()) {
507                         /* Preempt disable stops cpu going offline.
508                            If already offline, we'll be on wrong CPU:
509                            don't process */
510                         if (cpu_is_offline((long)__bind_cpu))
511                                 goto wait_to_die;
512                         do_softirq();
513                         preempt_enable_no_resched();
514                         cond_resched();
515                         preempt_disable();
516                 }
517                 preempt_enable();
518                 set_current_state(TASK_INTERRUPTIBLE);
519         }
520         __set_current_state(TASK_RUNNING);
521         return 0;
522
523 wait_to_die:
524         preempt_enable();
525         /* Wait for kthread_stop */
526         set_current_state(TASK_INTERRUPTIBLE);
527         while (!kthread_should_stop()) {
528                 schedule();
529                 set_current_state(TASK_INTERRUPTIBLE);
530         }
531         __set_current_state(TASK_RUNNING);
532         return 0;
533 }
534
535 #ifdef CONFIG_HOTPLUG_CPU
536 /*
537  * tasklet_kill_immediate is called to remove a tasklet which can already be
538  * scheduled for execution on @cpu.
539  *
540  * Unlike tasklet_kill, this function removes the tasklet
541  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
542  *
543  * When this function is called, @cpu must be in the CPU_DEAD state.
544  */
545 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
546 {
547         struct tasklet_struct **i;
548
549         BUG_ON(cpu_online(cpu));
550         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
551
552         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
553                 return;
554
555         /* CPU is dead, so no lock needed. */
556         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
557                 if (*i == t) {
558                         *i = t->next;
559                         /* If this was the tail element, move the tail ptr */
560                         if (*i == NULL)
561                                 per_cpu(tasklet_vec, cpu).tail = i;
562                         return;
563                 }
564         }
565         BUG();
566 }
567
568 static void takeover_tasklets(unsigned int cpu)
569 {
570         /* CPU is dead, so no lock needed. */
571         local_irq_disable();
572
573         /* Find end, append list for that CPU. */
574         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
575                 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
576                 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
577                 per_cpu(tasklet_vec, cpu).head = NULL;
578                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
579         }
580         raise_softirq_irqoff(TASKLET_SOFTIRQ);
581
582         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
583                 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
584                 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
585                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
586                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
587         }
588         raise_softirq_irqoff(HI_SOFTIRQ);
589
590         local_irq_enable();
591 }
592 #endif /* CONFIG_HOTPLUG_CPU */
593
594 static int __cpuinit cpu_callback(struct notifier_block *nfb,
595                                   unsigned long action,
596                                   void *hcpu)
597 {
598         int hotcpu = (unsigned long)hcpu;
599         struct task_struct *p;
600
601         switch (action) {
602         case CPU_UP_PREPARE:
603         case CPU_UP_PREPARE_FROZEN:
604                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
605                 if (IS_ERR(p)) {
606                         printk("ksoftirqd for %i failed\n", hotcpu);
607                         return NOTIFY_BAD;
608                 }
609                 kthread_bind(p, hotcpu);
610                 per_cpu(ksoftirqd, hotcpu) = p;
611                 break;
612         case CPU_ONLINE:
613         case CPU_ONLINE_FROZEN:
614                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
615                 break;
616 #ifdef CONFIG_HOTPLUG_CPU
617         case CPU_UP_CANCELED:
618         case CPU_UP_CANCELED_FROZEN:
619                 if (!per_cpu(ksoftirqd, hotcpu))
620                         break;
621                 /* Unbind so it can run.  Fall thru. */
622                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
623                              any_online_cpu(cpu_online_map));
624         case CPU_DEAD:
625         case CPU_DEAD_FROZEN: {
626                 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
627
628                 p = per_cpu(ksoftirqd, hotcpu);
629                 per_cpu(ksoftirqd, hotcpu) = NULL;
630                 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
631                 kthread_stop(p);
632                 takeover_tasklets(hotcpu);
633                 break;
634         }
635 #endif /* CONFIG_HOTPLUG_CPU */
636         }
637         return NOTIFY_OK;
638 }
639
640 static struct notifier_block __cpuinitdata cpu_nfb = {
641         .notifier_call = cpu_callback
642 };
643
644 static __init int spawn_ksoftirqd(void)
645 {
646         void *cpu = (void *)(long)smp_processor_id();
647         int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
648
649         BUG_ON(err == NOTIFY_BAD);
650         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
651         register_cpu_notifier(&cpu_nfb);
652         return 0;
653 }
654 early_initcall(spawn_ksoftirqd);
655
656 #ifdef CONFIG_SMP
657 /*
658  * Call a function on all processors
659  */
660 int on_each_cpu(void (*func) (void *info), void *info, int wait)
661 {
662         int ret = 0;
663
664         preempt_disable();
665         ret = smp_call_function(func, info, wait);
666         local_irq_disable();
667         func(info);
668         local_irq_enable();
669         preempt_enable();
670         return ret;
671 }
672 EXPORT_SYMBOL(on_each_cpu);
673 #endif