]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - kernel/softirq.c
cgroups: revamp subsys array
[linux-2.6.git] / kernel / softirq.c
index 14e1a14f94d20538f269049642d8a455f6dfb9ef..7c1a67ef027431e8f82d341ed7e729b9c0eec044 100644 (file)
@@ -3,7 +3,11 @@
  *
  *     Copyright (C) 1992 Linus Torvalds
  *
- * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *     Distribute under GPLv2.
+ *
+ *     Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *
+ *     Remote softirq infrastructure is by Jens Axboe.
  */
 
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
+#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/rcupdate.h>
+#include <linux/ftrace.h>
 #include <linux/smp.h>
+#include <linux/tick.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
@@ -42,17 +52,22 @@ irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
 EXPORT_SYMBOL(irq_stat);
 #endif
 
-static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
+char *softirq_to_name[NR_SOFTIRQS] = {
+       "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
+       "TASKLET", "SCHED", "HRTIMER",  "RCU"
+};
+
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
  * but we also don't want to introduce a worst case 1/HZ latency
  * to the pending events, so lets the scheduler to balance
  * the softirq load for us.
  */
-static inline void wakeup_softirqd(void)
+void wakeup_softirqd(void)
 {
        /* Interrupts are disabled: no need to stop preemption */
        struct task_struct *tsk = __get_cpu_var(ksoftirqd);
@@ -73,13 +88,23 @@ static void __local_bh_disable(unsigned long ip)
        WARN_ON_ONCE(in_irq());
 
        raw_local_irq_save(flags);
-       add_preempt_count(SOFTIRQ_OFFSET);
+       /*
+        * The preempt tracer hooks into add_preempt_count and will break
+        * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
+        * is set and before current->softirq_enabled is cleared.
+        * We must manually increment preempt_count here and manually
+        * call the trace_preempt_off later.
+        */
+       preempt_count() += SOFTIRQ_OFFSET;
        /*
         * Were softirqs turned off above:
         */
        if (softirq_count() == SOFTIRQ_OFFSET)
                trace_softirqs_off(ip);
        raw_local_irq_restore(flags);
+
+       if (preempt_count() == SOFTIRQ_OFFSET)
+               trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
 #else /* !CONFIG_TRACE_IRQFLAGS */
 static inline void __local_bh_disable(unsigned long ip)
@@ -96,20 +121,6 @@ void local_bh_disable(void)
 
 EXPORT_SYMBOL(local_bh_disable);
 
-void __local_bh_enable(void)
-{
-       WARN_ON_ONCE(in_irq());
-
-       /*
-        * softirqs should never be enabled by __local_bh_enable(),
-        * it always nests inside local_bh_enable() sections:
-        */
-       WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
-
-       sub_preempt_count(SOFTIRQ_OFFSET);
-}
-EXPORT_SYMBOL_GPL(__local_bh_enable);
-
 /*
  * Special-case - softirqs can safely be enabled in
  * cond_resched_softirq(), or by __do_softirq(),
@@ -127,23 +138,17 @@ void _local_bh_enable(void)
 
 EXPORT_SYMBOL(_local_bh_enable);
 
-void local_bh_enable(void)
+static inline void _local_bh_enable_ip(unsigned long ip)
 {
+       WARN_ON_ONCE(in_irq() || irqs_disabled());
 #ifdef CONFIG_TRACE_IRQFLAGS
-       unsigned long flags;
-
-       WARN_ON_ONCE(in_irq());
-#endif
-       WARN_ON_ONCE(irqs_disabled());
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_save(flags);
+       local_irq_disable();
 #endif
        /*
         * Are softirqs going to be turned on now:
         */
        if (softirq_count() == SOFTIRQ_OFFSET)
-               trace_softirqs_on((unsigned long)__builtin_return_address(0));
+               trace_softirqs_on(ip);
        /*
         * Keep preemption disabled until we are done with
         * softirq processing:
@@ -155,40 +160,20 @@ void local_bh_enable(void)
 
        dec_preempt_count();
 #ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_restore(flags);
+       local_irq_enable();
 #endif
        preempt_check_resched();
 }
+
+void local_bh_enable(void)
+{
+       _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
 EXPORT_SYMBOL(local_bh_enable);
 
 void local_bh_enable_ip(unsigned long ip)
 {
-#ifdef CONFIG_TRACE_IRQFLAGS
-       unsigned long flags;
-
-       WARN_ON_ONCE(in_irq());
-
-       local_irq_save(flags);
-#endif
-       /*
-        * Are softirqs going to be turned on now:
-        */
-       if (softirq_count() == SOFTIRQ_OFFSET)
-               trace_softirqs_on(ip);
-       /*
-        * Keep preemption disabled until we are done with
-        * softirq processing:
-        */
-       sub_preempt_count(SOFTIRQ_OFFSET - 1);
-
-       if (unlikely(!in_interrupt() && local_softirq_pending()))
-               do_softirq();
-
-       dec_preempt_count();
-#ifdef CONFIG_TRACE_IRQFLAGS
-       local_irq_restore(flags);
-#endif
-       preempt_check_resched();
+       _local_bh_enable_ip(ip);
 }
 EXPORT_SYMBOL(local_bh_enable_ip);
 
@@ -214,7 +199,7 @@ asmlinkage void __do_softirq(void)
        account_system_vtime(current);
 
        __local_bh_disable((unsigned long)__builtin_return_address(0));
-       trace_softirq_enter();
+       lockdep_softirq_enter();
 
        cpu = smp_processor_id();
 restart:
@@ -227,8 +212,22 @@ restart:
 
        do {
                if (pending & 1) {
+                       int prev_count = preempt_count();
+                       kstat_incr_softirqs_this_cpu(h - softirq_vec);
+
+                       trace_softirq_entry(h, softirq_vec);
                        h->action(h);
-                       rcu_bh_qsctr_inc(cpu);
+                       trace_softirq_exit(h, softirq_vec);
+                       if (unlikely(prev_count != preempt_count())) {
+                               printk(KERN_ERR "huh, entered softirq %td %s %p"
+                                      "with preempt_count %08x,"
+                                      " exited with %08x?\n", h - softirq_vec,
+                                      softirq_to_name[h - softirq_vec],
+                                      h->action, prev_count, preempt_count());
+                               preempt_count() = prev_count;
+                       }
+
+                       rcu_bh_qs(cpu);
                }
                h++;
                pending >>= 1;
@@ -243,7 +242,7 @@ restart:
        if (pending)
                wakeup_softirqd();
 
-       trace_softirq_exit();
+       lockdep_softirq_exit();
 
        account_system_vtime(current);
        _local_bh_enable();
@@ -269,8 +268,6 @@ asmlinkage void do_softirq(void)
        local_irq_restore(flags);
 }
 
-EXPORT_SYMBOL(do_softirq);
-
 #endif
 
 /*
@@ -278,9 +275,14 @@ EXPORT_SYMBOL(do_softirq);
  */
 void irq_enter(void)
 {
-       account_system_vtime(current);
-       add_preempt_count(HARDIRQ_OFFSET);
-       trace_hardirq_enter();
+       int cpu = smp_processor_id();
+
+       rcu_irq_enter();
+       if (idle_cpu(cpu) && !in_interrupt()) {
+               __irq_enter();
+               tick_check_idle(cpu);
+       } else
+               __irq_enter();
 }
 
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -299,13 +301,20 @@ void irq_exit(void)
        sub_preempt_count(IRQ_EXIT_OFFSET);
        if (!in_interrupt() && local_softirq_pending())
                invoke_softirq();
+
+       rcu_irq_exit();
+#ifdef CONFIG_NO_HZ
+       /* Make sure that timer wheel updates are propagated */
+       if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
+               tick_nohz_stop_sched_tick(0);
+#endif
        preempt_enable_no_resched();
 }
 
 /*
  * This function must run with irqs disabled!
  */
-inline fastcall void raise_softirq_irqoff(unsigned int nr)
+inline void raise_softirq_irqoff(unsigned int nr)
 {
        __raise_softirq_irqoff(nr);
 
@@ -322,9 +331,7 @@ inline fastcall void raise_softirq_irqoff(unsigned int nr)
                wakeup_softirqd();
 }
 
-EXPORT_SYMBOL(raise_softirq_irqoff);
-
-void fastcall raise_softirq(unsigned int nr)
+void raise_softirq(unsigned int nr)
 {
        unsigned long flags;
 
@@ -333,56 +340,70 @@ void fastcall raise_softirq(unsigned int nr)
        local_irq_restore(flags);
 }
 
-void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+void open_softirq(int nr, void (*action)(struct softirq_action *))
 {
-       softirq_vec[nr].data = data;
        softirq_vec[nr].action = action;
 }
 
-/* Tasklets */
+/*
+ * Tasklets
+ */
 struct tasklet_head
 {
-       struct tasklet_struct *list;
+       struct tasklet_struct *head;
+       struct tasklet_struct **tail;
 };
 
-/* Some compilers disobey section attribute on statics when not
-   initialized -- RR */
-static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
-static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
-void fastcall __tasklet_schedule(struct tasklet_struct *t)
+void __tasklet_schedule(struct tasklet_struct *t)
 {
        unsigned long flags;
 
        local_irq_save(flags);
-       t->next = __get_cpu_var(tasklet_vec).list;
-       __get_cpu_var(tasklet_vec).list = t;
+       t->next = NULL;
+       *__get_cpu_var(tasklet_vec).tail = t;
+       __get_cpu_var(tasklet_vec).tail = &(t->next);
        raise_softirq_irqoff(TASKLET_SOFTIRQ);
        local_irq_restore(flags);
 }
 
 EXPORT_SYMBOL(__tasklet_schedule);
 
-void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
+void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
        unsigned long flags;
 
        local_irq_save(flags);
-       t->next = __get_cpu_var(tasklet_hi_vec).list;
-       __get_cpu_var(tasklet_hi_vec).list = t;
+       t->next = NULL;
+       *__get_cpu_var(tasklet_hi_vec).tail = t;
+       __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
        raise_softirq_irqoff(HI_SOFTIRQ);
        local_irq_restore(flags);
 }
 
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
+void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+{
+       BUG_ON(!irqs_disabled());
+
+       t->next = __get_cpu_var(tasklet_hi_vec).head;
+       __get_cpu_var(tasklet_hi_vec).head = t;
+       __raise_softirq_irqoff(HI_SOFTIRQ);
+}
+
+EXPORT_SYMBOL(__tasklet_hi_schedule_first);
+
 static void tasklet_action(struct softirq_action *a)
 {
        struct tasklet_struct *list;
 
        local_irq_disable();
-       list = __get_cpu_var(tasklet_vec).list;
-       __get_cpu_var(tasklet_vec).list = NULL;
+       list = __get_cpu_var(tasklet_vec).head;
+       __get_cpu_var(tasklet_vec).head = NULL;
+       __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
        local_irq_enable();
 
        while (list) {
@@ -402,8 +423,9 @@ static void tasklet_action(struct softirq_action *a)
                }
 
                local_irq_disable();
-               t->next = __get_cpu_var(tasklet_vec).list;
-               __get_cpu_var(tasklet_vec).list = t;
+               t->next = NULL;
+               *__get_cpu_var(tasklet_vec).tail = t;
+               __get_cpu_var(tasklet_vec).tail = &(t->next);
                __raise_softirq_irqoff(TASKLET_SOFTIRQ);
                local_irq_enable();
        }
@@ -414,8 +436,9 @@ static void tasklet_hi_action(struct softirq_action *a)
        struct tasklet_struct *list;
 
        local_irq_disable();
-       list = __get_cpu_var(tasklet_hi_vec).list;
-       __get_cpu_var(tasklet_hi_vec).list = NULL;
+       list = __get_cpu_var(tasklet_hi_vec).head;
+       __get_cpu_var(tasklet_hi_vec).head = NULL;
+       __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
        local_irq_enable();
 
        while (list) {
@@ -435,8 +458,9 @@ static void tasklet_hi_action(struct softirq_action *a)
                }
 
                local_irq_disable();
-               t->next = __get_cpu_var(tasklet_hi_vec).list;
-               __get_cpu_var(tasklet_hi_vec).list = t;
+               t->next = NULL;
+               *__get_cpu_var(tasklet_hi_vec).tail = t;
+               __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
                __raise_softirq_irqoff(HI_SOFTIRQ);
                local_irq_enable();
        }
@@ -461,9 +485,9 @@ void tasklet_kill(struct tasklet_struct *t)
                printk("Attempt to kill tasklet from interrupt\n");
 
        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
-               do
+               do {
                        yield();
-               while (test_bit(TASKLET_STATE_SCHED, &t->state));
+               while (test_bit(TASKLET_STATE_SCHED, &t->state));
        }
        tasklet_unlock_wait(t);
        clear_bit(TASKLET_STATE_SCHED, &t->state);
@@ -471,17 +495,205 @@ void tasklet_kill(struct tasklet_struct *t)
 
 EXPORT_SYMBOL(tasklet_kill);
 
-void __init softirq_init(void)
+/*
+ * tasklet_hrtimer
+ */
+
+/*
+ * The trampoline is called when the hrtimer expires. It schedules a tasklet
+ * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
+ * hrtimer callback, but from softirq context.
+ */
+static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
+{
+       struct tasklet_hrtimer *ttimer =
+               container_of(timer, struct tasklet_hrtimer, timer);
+
+       tasklet_hi_schedule(&ttimer->tasklet);
+       return HRTIMER_NORESTART;
+}
+
+/*
+ * Helper function which calls the hrtimer callback from
+ * tasklet/softirq context
+ */
+static void __tasklet_hrtimer_trampoline(unsigned long data)
+{
+       struct tasklet_hrtimer *ttimer = (void *)data;
+       enum hrtimer_restart restart;
+
+       restart = ttimer->function(&ttimer->timer);
+       if (restart != HRTIMER_NORESTART)
+               hrtimer_restart(&ttimer->timer);
+}
+
+/**
+ * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
+ * @ttimer:     tasklet_hrtimer which is initialized
+ * @function:   hrtimer callback funtion which gets called from softirq context
+ * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
+ * @mode:       hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
+ */
+void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
+                         enum hrtimer_restart (*function)(struct hrtimer *),
+                         clockid_t which_clock, enum hrtimer_mode mode)
+{
+       hrtimer_init(&ttimer->timer, which_clock, mode);
+       ttimer->timer.function = __hrtimer_tasklet_trampoline;
+       tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
+                    (unsigned long)ttimer);
+       ttimer->function = function;
+}
+EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
+
+/*
+ * Remote softirq bits
+ */
+
+DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+EXPORT_PER_CPU_SYMBOL(softirq_work_list);
+
+static void __local_trigger(struct call_single_data *cp, int softirq)
 {
-       open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
-       open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+       struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
+
+       list_add_tail(&cp->list, head);
+
+       /* Trigger the softirq only if the list was previously empty.  */
+       if (head->next == &cp->list)
+               raise_softirq_irqoff(softirq);
 }
 
-static int ksoftirqd(void * __bind_cpu)
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+static void remote_softirq_receive(void *data)
 {
-       set_user_nice(current, 19);
-       current->flags |= PF_NOFREEZE;
+       struct call_single_data *cp = data;
+       unsigned long flags;
+       int softirq;
+
+       softirq = cp->priv;
+
+       local_irq_save(flags);
+       __local_trigger(cp, softirq);
+       local_irq_restore(flags);
+}
 
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       if (cpu_online(cpu)) {
+               cp->func = remote_softirq_receive;
+               cp->info = cp;
+               cp->flags = 0;
+               cp->priv = softirq;
+
+               __smp_call_function_single(cpu, cp, 0);
+               return 0;
+       }
+       return 1;
+}
+#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       return 1;
+}
+#endif
+
+/**
+ * __send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @this_cpu: the currently executing cpu
+ * @softirq: the softirq for the work
+ *
+ * Attempt to schedule softirq work on a remote cpu.  If this cannot be
+ * done, the work is instead queued up on the local cpu.
+ *
+ * Interrupts must be disabled.
+ */
+void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
+{
+       if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
+               __local_trigger(cp, softirq);
+}
+EXPORT_SYMBOL(__send_remote_softirq);
+
+/**
+ * send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @softirq: the softirq for the work
+ *
+ * Like __send_remote_softirq except that disabling interrupts and
+ * computing the current cpu is done for the caller.
+ */
+void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+       unsigned long flags;
+       int this_cpu;
+
+       local_irq_save(flags);
+       this_cpu = smp_processor_id();
+       __send_remote_softirq(cp, cpu, this_cpu, softirq);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(send_remote_softirq);
+
+static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+                                              unsigned long action, void *hcpu)
+{
+       /*
+        * If a CPU goes away, splice its entries to the current CPU
+        * and trigger a run of the softirq
+        */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               int cpu = (unsigned long) hcpu;
+               int i;
+
+               local_irq_disable();
+               for (i = 0; i < NR_SOFTIRQS; i++) {
+                       struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
+                       struct list_head *local_head;
+
+                       if (list_empty(head))
+                               continue;
+
+                       local_head = &__get_cpu_var(softirq_work_list[i]);
+                       list_splice_init(head, local_head);
+                       raise_softirq_irqoff(i);
+               }
+               local_irq_enable();
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+       .notifier_call  = remote_softirq_cpu_notify,
+};
+
+void __init softirq_init(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               int i;
+
+               per_cpu(tasklet_vec, cpu).tail =
+                       &per_cpu(tasklet_vec, cpu).head;
+               per_cpu(tasklet_hi_vec, cpu).tail =
+                       &per_cpu(tasklet_hi_vec, cpu).head;
+               for (i = 0; i < NR_SOFTIRQS; i++)
+                       INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
+       }
+
+       register_hotcpu_notifier(&remote_softirq_cpu_notifier);
+
+       open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+       open_softirq(HI_SOFTIRQ, tasklet_hi_action);
+}
+
+static int run_ksoftirqd(void * __bind_cpu)
+{
        set_current_state(TASK_INTERRUPTIBLE);
 
        while (!kthread_should_stop()) {
@@ -504,6 +716,7 @@ static int ksoftirqd(void * __bind_cpu)
                        preempt_enable_no_resched();
                        cond_resched();
                        preempt_disable();
+                       rcu_sched_qs((long)__bind_cpu);
                }
                preempt_enable();
                set_current_state(TASK_INTERRUPTIBLE);
@@ -544,9 +757,12 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
                return;
 
        /* CPU is dead, so no lock needed. */
-       for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
+       for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
                if (*i == t) {
                        *i = t->next;
+                       /* If this was the tail element, move the tail ptr */
+                       if (*i == NULL)
+                               per_cpu(tasklet_vec, cpu).tail = i;
                        return;
                }
        }
@@ -555,20 +771,24 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
 
 static void takeover_tasklets(unsigned int cpu)
 {
-       struct tasklet_struct **i;
-
        /* CPU is dead, so no lock needed. */
        local_irq_disable();
 
        /* Find end, append list for that CPU. */
-       for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
-       *i = per_cpu(tasklet_vec, cpu).list;
-       per_cpu(tasklet_vec, cpu).list = NULL;
+       if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
+               *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
+               __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
+               per_cpu(tasklet_vec, cpu).head = NULL;
+               per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
+       }
        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 
-       for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
-       *i = per_cpu(tasklet_hi_vec, cpu).list;
-       per_cpu(tasklet_hi_vec, cpu).list = NULL;
+       if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
+               *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
+               __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
+               per_cpu(tasklet_hi_vec, cpu).head = NULL;
+               per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
+       }
        raise_softirq_irqoff(HI_SOFTIRQ);
 
        local_irq_enable();
@@ -584,7 +804,8 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 
        switch (action) {
        case CPU_UP_PREPARE:
-               p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+       case CPU_UP_PREPARE_FROZEN:
+               p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk("ksoftirqd for %i failed\n", hotcpu);
                        return NOTIFY_BAD;
@@ -593,21 +814,28 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
                per_cpu(ksoftirqd, hotcpu) = p;
                break;
        case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
                wake_up_process(per_cpu(ksoftirqd, hotcpu));
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
+       case CPU_UP_CANCELED_FROZEN:
                if (!per_cpu(ksoftirqd, hotcpu))
                        break;
                /* Unbind so it can run.  Fall thru. */
                kthread_bind(per_cpu(ksoftirqd, hotcpu),
-                            any_online_cpu(cpu_online_map));
+                            cpumask_any(cpu_online_mask));
        case CPU_DEAD:
+       case CPU_DEAD_FROZEN: {
+               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
                p = per_cpu(ksoftirqd, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = NULL;
+               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
                kthread_stop(p);
                takeover_tasklets(hotcpu);
                break;
+       }
 #endif /* CONFIG_HOTPLUG_CPU */
        }
        return NOTIFY_OK;
@@ -617,7 +845,7 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
        .notifier_call = cpu_callback
 };
 
-__init int spawn_ksoftirqd(void)
+static __init int spawn_ksoftirqd(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
@@ -627,17 +855,18 @@ __init int spawn_ksoftirqd(void)
        register_cpu_notifier(&cpu_nfb);
        return 0;
 }
+early_initcall(spawn_ksoftirqd);
 
 #ifdef CONFIG_SMP
 /*
  * Call a function on all processors
  */
-int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
 {
        int ret = 0;
 
        preempt_disable();
-       ret = smp_call_function(func, info, retry, wait);
+       ret = smp_call_function(func, info, wait);
        local_irq_disable();
        func(info);
        local_irq_enable();
@@ -646,3 +875,28 @@ int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
 }
 EXPORT_SYMBOL(on_each_cpu);
 #endif
+
+/*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
+ *   GCC does not inline them incorrectly. ]
+ */
+
+int __init __weak early_irq_init(void)
+{
+       return 0;
+}
+
+int __init __weak arch_probe_nr_irqs(void)
+{
+       return 0;
+}
+
+int __init __weak arch_early_irq_init(void)
+{
+       return 0;
+}
+
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
+{
+       return 0;
+}