]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - kernel/irq/manage.c
Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-3.10.git] / kernel / irq / manage.c
index 99395a24f432d2bea80b2294571b903590d38664..4c69326aa773f6469b7170d66814d7480ea12ea7 100644 (file)
@@ -7,6 +7,8 @@
  * This file contains driver APIs to the irq subsystem.
  */
 
+#define pr_fmt(fmt) "genirq: " fmt
+
 #include <linux/irq.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/task_work.h>
 
 #include "internals.h"
 
+#ifdef CONFIG_IRQ_FORCED_THREADING
+__read_mostly bool force_irqthreads;
+
+static int __init setup_forced_irqthreads(char *arg)
+{
+       force_irqthreads = true;
+       return 0;
+}
+early_param("threadirqs", setup_forced_irqthreads);
+#endif
+
 /**
  *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *     @irq: interrupt number to wait for
@@ -30,7 +44,7 @@
 void synchronize_irq(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
-       unsigned int state;
+       bool inprogress;
 
        if (!desc)
                return;
@@ -42,16 +56,16 @@ void synchronize_irq(unsigned int irq)
                 * Wait until we're out of the critical section.  This might
                 * give the wrong answer due to the lack of memory barriers.
                 */
-               while (desc->istate & IRQS_INPROGRESS)
+               while (irqd_irq_inprogress(&desc->irq_data))
                        cpu_relax();
 
                /* Ok, that indicated we're done: double-check carefully. */
                raw_spin_lock_irqsave(&desc->lock, flags);
-               state = desc->istate;
+               inprogress = irqd_irq_inprogress(&desc->irq_data);
                raw_spin_unlock_irqrestore(&desc->lock, flags);
 
                /* Oops, that failed? */
-       } while (state & IRQS_INPROGRESS);
+       } while (inprogress);
 
        /*
         * We made sure that no hardirq handler is running. Now verify
@@ -101,13 +115,13 @@ void irq_set_thread_affinity(struct irq_desc *desc)
 }
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
 {
-       return irq_settings_can_move_pcntxt(desc);
+       return irqd_can_move_in_process_context(data);
 }
-static inline bool irq_move_pending(struct irq_desc *desc)
+static inline bool irq_move_pending(struct irq_data *data)
 {
-       return irqd_is_setaffinity_pending(&desc->irq_data);
+       return irqd_is_setaffinity_pending(data);
 }
 static inline void
 irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
@@ -120,43 +134,46 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
        cpumask_copy(mask, desc->pending_mask);
 }
 #else
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
-static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
+static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
+static inline bool irq_move_pending(struct irq_data *data) { return false; }
 static inline void
 irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
 static inline void
 irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
 #endif
 
-/**
- *     irq_set_affinity - Set the irq affinity of a given irq
- *     @irq:           Interrupt to set affinity
- *     @cpumask:       cpumask
- *
- */
-int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
+int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
+                       bool force)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irq_chip *chip = desc->irq_data.chip;
-       unsigned long flags;
+       struct irq_desc *desc = irq_data_to_desc(data);
+       struct irq_chip *chip = irq_data_get_irq_chip(data);
+       int ret;
+
+       ret = chip->irq_set_affinity(data, mask, false);
+       switch (ret) {
+       case IRQ_SET_MASK_OK:
+               cpumask_copy(data->affinity, mask);
+       case IRQ_SET_MASK_OK_NOCOPY:
+               irq_set_thread_affinity(desc);
+               ret = 0;
+       }
+
+       return ret;
+}
+
+int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
+{
+       struct irq_chip *chip = irq_data_get_irq_chip(data);
+       struct irq_desc *desc = irq_data_to_desc(data);
        int ret = 0;
 
-       if (!chip->irq_set_affinity)
+       if (!chip || !chip->irq_set_affinity)
                return -EINVAL;
 
-       raw_spin_lock_irqsave(&desc->lock, flags);
-
-       if (irq_can_move_pcntxt(desc)) {
-               ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
-               switch (ret) {
-               case IRQ_SET_MASK_OK:
-                       cpumask_copy(desc->irq_data.affinity, mask);
-               case IRQ_SET_MASK_OK_NOCOPY:
-                       irq_set_thread_affinity(desc);
-                       ret = 0;
-               }
+       if (irq_can_move_pcntxt(data)) {
+               ret = irq_do_set_affinity(data, mask, false);
        } else {
-               irqd_set_move_pending(&desc->irq_data);
+               irqd_set_move_pending(data);
                irq_copy_pending(desc, mask);
        }
 
@@ -164,24 +181,41 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
                kref_get(&desc->affinity_notify->kref);
                schedule_work(&desc->affinity_notify->work);
        }
-       irq_compat_set_affinity(desc);
-       irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
+       irqd_set(data, IRQD_AFFINITY_SET);
+
        return ret;
 }
 
-int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+/**
+ *     irq_set_affinity - Set the irq affinity of a given irq
+ *     @irq:           Interrupt to set affinity
+ *     @mask:          cpumask
+ *
+ */
+int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
+       int ret;
 
        if (!desc)
                return -EINVAL;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
-       desc->affinity_hint = m;
+       ret =  __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
        raw_spin_unlock_irqrestore(&desc->lock, flags);
+       return ret;
+}
 
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+       unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+
+       if (!desc)
+               return -EINVAL;
+       desc->affinity_hint = m;
+       irq_put_desc_unlock(desc, flags);
        return 0;
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
@@ -198,7 +232,7 @@ static void irq_affinity_notify(struct work_struct *work)
                goto out;
 
        raw_spin_lock_irqsave(&desc->lock, flags);
-       if (irq_move_pending(desc))
+       if (irq_move_pending(&desc->irq_data))
                irq_get_pending(cpumask, desc);
        else
                cpumask_copy(cpumask, desc->irq_data.affinity);
@@ -261,9 +295,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
 static int
 setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
 {
-       struct irq_chip *chip = irq_desc_get_chip(desc);
        struct cpumask *set = irq_default_affinity;
-       int ret;
+       int node = desc->irq_data.node;
 
        /* Excludes PER_CPU and NO_BALANCE interrupts */
        if (!irq_can_set_affinity(irq))
@@ -277,20 +310,19 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
                if (cpumask_intersects(desc->irq_data.affinity,
                                       cpu_online_mask))
                        set = desc->irq_data.affinity;
-               else {
-                       irq_compat_clr_affinity(desc);
+               else
                        irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
-               }
        }
 
        cpumask_and(mask, cpu_online_mask, set);
-       ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
-       switch (ret) {
-       case IRQ_SET_MASK_OK:
-               cpumask_copy(desc->irq_data.affinity, mask);
-       case IRQ_SET_MASK_OK_NOCOPY:
-               irq_set_thread_affinity(desc);
+       if (node != NUMA_NO_NODE) {
+               const struct cpumask *nodemask = cpumask_of_node(node);
+
+               /* make sure at least one of the cpus in nodemask is online */
+               if (cpumask_intersects(mask, nodemask))
+                       cpumask_and(mask, mask, nodemask);
        }
+       irq_do_set_affinity(&desc->irq_data, mask, false);
        return 0;
 }
 #else
@@ -336,6 +368,18 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
                irq_disable(desc);
 }
 
+static int __disable_irq_nosync(unsigned int irq)
+{
+       unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+
+       if (!desc)
+               return -EINVAL;
+       __disable_irq(desc, irq, false);
+       irq_put_desc_busunlock(desc, flags);
+       return 0;
+}
+
 /**
  *     disable_irq_nosync - disable an irq without waiting
  *     @irq: Interrupt to disable
@@ -349,17 +393,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
  */
 void disable_irq_nosync(unsigned int irq)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
-       unsigned long flags;
-
-       if (!desc)
-               return;
-
-       chip_bus_lock(desc);
-       raw_spin_lock_irqsave(&desc->lock, flags);
-       __disable_irq(desc, irq, false);
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
-       chip_bus_sync_unlock(desc);
+       __disable_irq_nosync(irq);
 }
 EXPORT_SYMBOL(disable_irq_nosync);
 
@@ -377,13 +411,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
  */
 void disable_irq(unsigned int irq)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
-
-       if (!desc)
-               return;
-
-       disable_irq_nosync(irq);
-       if (desc->action)
+       if (!__disable_irq_nosync(irq))
                synchronize_irq(irq);
 }
 EXPORT_SYMBOL(disable_irq);
@@ -434,21 +462,18 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
  */
 void enable_irq(unsigned int irq)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
 
        if (!desc)
                return;
-
        if (WARN(!desc->irq_data.chip,
                 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
-               return;
+               goto out;
 
-       chip_bus_lock(desc);
-       raw_spin_lock_irqsave(&desc->lock, flags);
        __enable_irq(desc, irq, false);
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
-       chip_bus_sync_unlock(desc);
+out:
+       irq_put_desc_busunlock(desc, flags);
 }
 EXPORT_SYMBOL(enable_irq);
 
@@ -457,6 +482,9 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
        struct irq_desc *desc = irq_to_desc(irq);
        int ret = -ENXIO;
 
+       if (irq_desc_get_chip(desc)->flags &  IRQCHIP_SKIP_SET_WAKE)
+               return 0;
+
        if (desc->irq_data.chip->irq_set_wake)
                ret = desc->irq_data.chip->irq_set_wake(&desc->irq_data, on);
 
@@ -477,15 +505,16 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
  */
 int irq_set_irq_wake(unsigned int irq, unsigned int on)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
        unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
        int ret = 0;
 
+       if (!desc)
+               return -EINVAL;
+
        /* wakeup-capable irqs can be shared between drivers that
         * don't need to have the same sleep mode behaviors.
         */
-       chip_bus_lock(desc);
-       raw_spin_lock_irqsave(&desc->lock, flags);
        if (on) {
                if (desc->wake_depth++ == 0) {
                        ret = set_irq_wake_real(irq, on);
@@ -505,9 +534,7 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
                                irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
                }
        }
-
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
-       chip_bus_sync_unlock(desc);
+       irq_put_desc_busunlock(desc, flags);
        return ret;
 }
 EXPORT_SYMBOL(irq_set_irq_wake);
@@ -519,25 +546,20 @@ EXPORT_SYMBOL(irq_set_irq_wake);
  */
 int can_request_irq(unsigned int irq, unsigned long irqflags)
 {
-       struct irq_desc *desc = irq_to_desc(irq);
-       struct irqaction *action;
        unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
+       int canrequest = 0;
 
        if (!desc)
                return 0;
 
-       if (!irq_settings_can_request(desc))
-               return 0;
-
-       raw_spin_lock_irqsave(&desc->lock, flags);
-       action = desc->action;
-       if (action)
-               if (irqflags & action->flags & IRQF_SHARED)
-                       action = NULL;
-
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
-
-       return !action;
+       if (irq_settings_can_request(desc)) {
+               if (desc->action)
+                       if (irqflags & desc->action->flags & IRQF_SHARED)
+                               canrequest =1;
+       }
+       irq_put_desc_unlock(desc, flags);
+       return canrequest;
 }
 
 int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
@@ -552,16 +574,16 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                 * flow-types?
                 */
                pr_debug("No set_type function for IRQ %d (%s)\n", irq,
-                               chip ? (chip->name ? : "unknown") : "unknown");
+                        chip ? (chip->name ? : "unknown") : "unknown");
                return 0;
        }
 
        flags &= IRQ_TYPE_SENSE_MASK;
 
        if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
-               if (!(desc->istate & IRQS_MASKED))
+               if (!irqd_irq_masked(&desc->irq_data))
                        mask_irq(desc);
-               if (!(desc->istate & IRQS_DISABLED))
+               if (!irqd_irq_disabled(&desc->irq_data))
                        unmask = 1;
        }
 
@@ -583,11 +605,10 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
                        irqd_set(&desc->irq_data, IRQD_LEVEL);
                }
 
-               if (chip != desc->irq_data.chip)
-                       irq_chip_set_defaults(desc->irq_data.chip);
                ret = 0;
+               break;
        default:
-               pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
+               pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
                       flags, irq, chip->irq_set_type);
        }
        if (unmask)
@@ -617,8 +638,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
 
 static int irq_wait_for_interrupt(struct irqaction *action)
 {
+       set_current_state(TASK_INTERRUPTIBLE);
+
        while (!kthread_should_stop()) {
-               set_current_state(TASK_INTERRUPTIBLE);
 
                if (test_and_clear_bit(IRQTF_RUNTHREAD,
                                       &action->thread_flags)) {
@@ -626,7 +648,9 @@ static int irq_wait_for_interrupt(struct irqaction *action)
                        return 0;
                }
                schedule();
+               set_current_state(TASK_INTERRUPTIBLE);
        }
+       __set_current_state(TASK_RUNNING);
        return -1;
 }
 
@@ -635,8 +659,11 @@ static int irq_wait_for_interrupt(struct irqaction *action)
  * handler finished. unmask if the interrupt has not been disabled and
  * is marked MASKED.
  */
-static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
+static void irq_finalize_oneshot(struct irq_desc *desc,
+                                struct irqaction *action)
 {
+       if (!(desc->istate & IRQS_ONESHOT))
+               return;
 again:
        chip_bus_lock(desc);
        raw_spin_lock_irq(&desc->lock);
@@ -649,19 +676,34 @@ again:
         * on the other CPU. If we unmask the irq line then the
         * interrupt can come in again and masks the line, leaves due
         * to IRQS_INPROGRESS and the irq line is masked forever.
+        *
+        * This also serializes the state of shared oneshot handlers
+        * versus "desc->threads_onehsot |= action->thread_mask;" in
+        * irq_wake_thread(). See the comment there which explains the
+        * serialization.
         */
-       if (unlikely(desc->istate & IRQS_INPROGRESS)) {
+       if (unlikely(irqd_irq_inprogress(&desc->irq_data))) {
                raw_spin_unlock_irq(&desc->lock);
                chip_bus_sync_unlock(desc);
                cpu_relax();
                goto again;
        }
 
-       if (!(desc->istate & IRQS_DISABLED) && (desc->istate & IRQS_MASKED)) {
-               irq_compat_clr_masked(desc);
-               desc->istate &= ~IRQS_MASKED;
-               desc->irq_data.chip->irq_unmask(&desc->irq_data);
-       }
+       /*
+        * Now check again, whether the thread should run. Otherwise
+        * we would clear the threads_oneshot bit of this thread which
+        * was just set.
+        */
+       if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+               goto out_unlock;
+
+       desc->threads_oneshot &= ~action->thread_mask;
+
+       if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
+           irqd_irq_masked(&desc->irq_data))
+               unmask_irq(desc);
+
+out_unlock:
        raw_spin_unlock_irq(&desc->lock);
        chip_bus_sync_unlock(desc);
 }
@@ -699,81 +741,137 @@ static inline void
 irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
 #endif
 
+/*
+ * Interrupts which are not explicitely requested as threaded
+ * interrupts rely on the implicit bh/preempt disable of the hard irq
+ * context. So we need to disable bh here to avoid deadlocks and other
+ * side effects.
+ */
+static irqreturn_t
+irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
+{
+       irqreturn_t ret;
+
+       local_bh_disable();
+       ret = action->thread_fn(action->irq, action->dev_id);
+       irq_finalize_oneshot(desc, action);
+       local_bh_enable();
+       return ret;
+}
+
+/*
+ * Interrupts explicitely requested as threaded interupts want to be
+ * preemtible - many of them need to sleep and wait for slow busses to
+ * complete.
+ */
+static irqreturn_t irq_thread_fn(struct irq_desc *desc,
+               struct irqaction *action)
+{
+       irqreturn_t ret;
+
+       ret = action->thread_fn(action->irq, action->dev_id);
+       irq_finalize_oneshot(desc, action);
+       return ret;
+}
+
+static void wake_threads_waitq(struct irq_desc *desc)
+{
+       if (atomic_dec_and_test(&desc->threads_active) &&
+           waitqueue_active(&desc->wait_for_threads))
+               wake_up(&desc->wait_for_threads);
+}
+
+static void irq_thread_dtor(struct callback_head *unused)
+{
+       struct task_struct *tsk = current;
+       struct irq_desc *desc;
+       struct irqaction *action;
+
+       if (WARN_ON_ONCE(!(current->flags & PF_EXITING)))
+               return;
+
+       action = kthread_data(tsk);
+
+       pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+              tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
+
+
+       desc = irq_to_desc(action->irq);
+       /*
+        * If IRQTF_RUNTHREAD is set, we need to decrement
+        * desc->threads_active and wake possible waiters.
+        */
+       if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+               wake_threads_waitq(desc);
+
+       /* Prevent a stale desc->threads_oneshot */
+       irq_finalize_oneshot(desc, action);
+}
+
 /*
  * Interrupt handler thread
  */
 static int irq_thread(void *data)
 {
+       struct callback_head on_exit_work;
        static const struct sched_param param = {
                .sched_priority = MAX_USER_RT_PRIO/2,
        };
        struct irqaction *action = data;
        struct irq_desc *desc = irq_to_desc(action->irq);
-       int wake, oneshot = desc->istate & IRQS_ONESHOT;
+       irqreturn_t (*handler_fn)(struct irq_desc *desc,
+                       struct irqaction *action);
+
+       if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
+                                       &action->thread_flags))
+               handler_fn = irq_forced_thread_fn;
+       else
+               handler_fn = irq_thread_fn;
 
        sched_setscheduler(current, SCHED_FIFO, &param);
-       current->irqaction = action;
+
+       init_task_work(&on_exit_work, irq_thread_dtor);
+       task_work_add(current, &on_exit_work, false);
 
        while (!irq_wait_for_interrupt(action)) {
+               irqreturn_t action_ret;
 
                irq_thread_check_affinity(desc, action);
 
-               atomic_inc(&desc->threads_active);
-
-               raw_spin_lock_irq(&desc->lock);
-               if (unlikely(desc->istate & IRQS_DISABLED)) {
-                       /*
-                        * CHECKME: We might need a dedicated
-                        * IRQ_THREAD_PENDING flag here, which
-                        * retriggers the thread in check_irq_resend()
-                        * but AFAICT IRQS_PENDING should be fine as it
-                        * retriggers the interrupt itself --- tglx
-                        */
-                       irq_compat_set_pending(desc);
-                       desc->istate |= IRQS_PENDING;
-                       raw_spin_unlock_irq(&desc->lock);
-               } else {
-                       raw_spin_unlock_irq(&desc->lock);
-
-                       action->thread_fn(action->irq, action->dev_id);
-
-                       if (oneshot)
-                               irq_finalize_oneshot(action->irq, desc);
-               }
-
-               wake = atomic_dec_and_test(&desc->threads_active);
+               action_ret = handler_fn(desc, action);
+               if (!noirqdebug)
+                       note_interrupt(action->irq, desc, action_ret);
 
-               if (wake && waitqueue_active(&desc->wait_for_threads))
-                       wake_up(&desc->wait_for_threads);
+               wake_threads_waitq(desc);
        }
 
        /*
-        * Clear irqaction. Otherwise exit_irq_thread() would make
-        * fuzz about an active irq thread going into nirvana.
+        * This is the regular exit path. __free_irq() is stopping the
+        * thread via kthread_stop() after calling
+        * synchronize_irq(). So neither IRQTF_RUNTHREAD nor the
+        * oneshot mask bit can be set. We cannot verify that as we
+        * cannot touch the oneshot mask at this point anymore as
+        * __setup_irq() might have given out currents thread_mask
+        * again.
         */
-       current->irqaction = NULL;
+       task_work_cancel(current, irq_thread_dtor);
        return 0;
 }
 
-/*
- * Called from do_exit()
- */
-void exit_irq_thread(void)
+static void irq_setup_forced_threading(struct irqaction *new)
 {
-       struct task_struct *tsk = current;
-
-       if (!tsk->irqaction)
+       if (!force_irqthreads)
+               return;
+       if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
                return;
 
-       printk(KERN_ERR
-              "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
-              tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+       new->flags |= IRQF_ONESHOT;
 
-       /*
-        * Set the THREAD DIED flag to prevent further wakeups of the
-        * soon to be gone threaded handler.
-        */
-       set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+       if (!new->thread_fn) {
+               set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+               new->thread_fn = new->handler;
+               new->handler = irq_default_primary_handler;
+       }
 }
 
 /*
@@ -784,8 +882,7 @@ static int
 __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 {
        struct irqaction *old, **old_ptr;
-       const char *old_name = NULL;
-       unsigned long flags;
+       unsigned long flags, thread_mask = 0;
        int ret, nested, shared = 0;
        cpumask_var_t mask;
 
@@ -794,26 +891,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
        if (desc->irq_data.chip == &no_irq_chip)
                return -ENOSYS;
-       /*
-        * Some drivers like serial.c use request_irq() heavily,
-        * so we have to be careful not to interfere with a
-        * running system.
-        */
-       if (new->flags & IRQF_SAMPLE_RANDOM) {
-               /*
-                * This function might sleep, we want to call it first,
-                * outside of the atomic block.
-                * Yes, this might clear the entropy pool if the wrong
-                * driver is attempted to be loaded, without actually
-                * installing a new handler, but is this really a problem,
-                * only the sysadmin is able to do this.
-                */
-               rand_initialize_irq(irq);
-       }
-
-       /* Oneshot interrupts are not allowed with shared */
-       if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
-               return -EINVAL;
+       if (!try_module_get(desc->owner))
+               return -ENODEV;
 
        /*
         * Check whether the interrupt nests into another interrupt
@@ -821,14 +900,19 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
         */
        nested = irq_settings_is_nested_thread(desc);
        if (nested) {
-               if (!new->thread_fn)
-                       return -EINVAL;
+               if (!new->thread_fn) {
+                       ret = -EINVAL;
+                       goto out_mput;
+               }
                /*
                 * Replace the primary handler which was provided from
                 * the driver for non nested interrupt handling by the
                 * dummy function which warns when called.
                 */
                new->handler = irq_nested_primary_handler;
+       } else {
+               if (irq_settings_can_thread(desc))
+                       irq_setup_forced_threading(new);
        }
 
        /*
@@ -841,8 +925,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
                t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
                                   new->name);
-               if (IS_ERR(t))
-                       return PTR_ERR(t);
+               if (IS_ERR(t)) {
+                       ret = PTR_ERR(t);
+                       goto out_mput;
+               }
                /*
                 * We keep the reference to the task struct even if
                 * the thread dies to avoid that the interrupt code
@@ -857,6 +943,18 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                goto out_thread;
        }
 
+       /*
+        * Drivers are often written to work w/o knowledge about the
+        * underlying irq chip implementation, so a request for a
+        * threaded irq without a primary hard irq context handler
+        * requires the ONESHOT flag to be set. Some irq chips like
+        * MSI based interrupts are per se one shot safe. Check the
+        * chip flags, so we can avoid the unmask dance at the end of
+        * the threaded handler for those.
+        */
+       if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
+               new->flags &= ~IRQF_ONESHOT;
+
        /*
         * The following block of code has to be executed atomically
         */
@@ -868,13 +966,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                 * Can't share interrupts unless both agree to and are
                 * the same type (level, edge, polarity). So both flag
                 * fields must have IRQF_SHARED set and the bits which
-                * set the trigger type must match.
+                * set the trigger type must match. Also all must
+                * agree on ONESHOT.
                 */
                if (!((old->flags & new->flags) & IRQF_SHARED) ||
-                   ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) {
-                       old_name = old->name;
+                   ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||
+                   ((old->flags ^ new->flags) & IRQF_ONESHOT))
                        goto mismatch;
-               }
 
                /* All handlers must agree on per-cpuness */
                if ((old->flags & IRQF_PERCPU) !=
@@ -883,15 +981,78 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
                /* add new interrupt at end of irq queue */
                do {
+                       /*
+                        * Or all existing action->thread_mask bits,
+                        * so we can find the next zero bit for this
+                        * new action.
+                        */
+                       thread_mask |= old->thread_mask;
                        old_ptr = &old->next;
                        old = *old_ptr;
                } while (old);
                shared = 1;
        }
 
-       if (!shared) {
-               irq_chip_set_defaults(desc->irq_data.chip);
+       /*
+        * Setup the thread mask for this irqaction for ONESHOT. For
+        * !ONESHOT irqs the thread mask is 0 so we can avoid a
+        * conditional in irq_wake_thread().
+        */
+       if (new->flags & IRQF_ONESHOT) {
+               /*
+                * Unlikely to have 32 resp 64 irqs sharing one line,
+                * but who knows.
+                */
+               if (thread_mask == ~0UL) {
+                       ret = -EBUSY;
+                       goto out_mask;
+               }
+               /*
+                * The thread_mask for the action is or'ed to
+                * desc->thread_active to indicate that the
+                * IRQF_ONESHOT thread handler has been woken, but not
+                * yet finished. The bit is cleared when a thread
+                * completes. When all threads of a shared interrupt
+                * line have completed desc->threads_active becomes
+                * zero and the interrupt line is unmasked. See
+                * handle.c:irq_wake_thread() for further information.
+                *
+                * If no thread is woken by primary (hard irq context)
+                * interrupt handlers, then desc->threads_active is
+                * also checked for zero to unmask the irq line in the
+                * affected hard irq flow handlers
+                * (handle_[fasteoi|level]_irq).
+                *
+                * The new action gets the first zero bit of
+                * thread_mask assigned. See the loop above which or's
+                * all existing action->thread_mask bits.
+                */
+               new->thread_mask = 1 << ffz(thread_mask);
 
+       } else if (new->handler == irq_default_primary_handler &&
+                  !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
+               /*
+                * The interrupt was requested with handler = NULL, so
+                * we use the default primary handler for it. But it
+                * does not have the oneshot flag set. In combination
+                * with level interrupts this is deadly, because the
+                * default primary handler just wakes the thread, then
+                * the irq lines is reenabled, but the device still
+                * has the level irq asserted. Rinse and repeat....
+                *
+                * While this works for edge type interrupts, we play
+                * it safe and reject unconditionally because we can't
+                * say for sure which type this interrupt really
+                * has. The type flags are unreliable as the
+                * underlying chip implementation can override them.
+                */
+               pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
+                      irq);
+               ret = -EINVAL;
+               goto out_mask;
+       }
+
+       if (!shared) {
                init_waitqueue_head(&desc->wait_for_threads);
 
                /* Setup the type (level, edge polarity) if configured: */
@@ -904,8 +1065,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                }
 
                desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
-                                 IRQS_INPROGRESS | IRQS_ONESHOT | \
-                                 IRQS_WAITING);
+                                 IRQS_ONESHOT | IRQS_WAITING);
+               irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
 
                if (new->flags & IRQF_PERCPU) {
                        irqd_set(&desc->irq_data, IRQD_PER_CPU);
@@ -916,7 +1077,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                        desc->istate |= IRQS_ONESHOT;
 
                if (irq_settings_can_autoenable(desc))
-                       irq_startup(desc);
+                       irq_startup(desc, true);
                else
                        /* Undo nested disables: */
                        desc->depth = 1;
@@ -936,7 +1097,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
                if (nmsk != omsk)
                        /* hope the handler works with current  trigger mode */
-                       pr_warning("IRQ %d uses trigger mode %u; requested %u\n",
+                       pr_warning("irq %d uses trigger mode %u; requested %u\n",
                                   irq, nmsk, omsk);
        }
 
@@ -968,33 +1129,34 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
        register_irq_proc(irq, desc);
        new->dir = NULL;
        register_handler_proc(irq, new);
+       free_cpumask_var(mask);
 
        return 0;
 
 mismatch:
-#ifdef CONFIG_DEBUG_SHIRQ
        if (!(new->flags & IRQF_PROBE_SHARED)) {
-               printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
-               if (old_name)
-                       printk(KERN_ERR "current handler: %s\n", old_name);
+               pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
+                      irq, new->flags, new->name, old->flags, old->name);
+#ifdef CONFIG_DEBUG_SHIRQ
                dump_stack();
-       }
 #endif
+       }
        ret = -EBUSY;
 
 out_mask:
+       raw_spin_unlock_irqrestore(&desc->lock, flags);
        free_cpumask_var(mask);
 
 out_thread:
-       raw_spin_unlock_irqrestore(&desc->lock, flags);
        if (new->thread) {
                struct task_struct *t = new->thread;
 
                new->thread = NULL;
-               if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
-                       kthread_stop(t);
+               kthread_stop(t);
                put_task_struct(t);
        }
+out_mput:
+       module_put(desc->owner);
        return ret;
 }
 
@@ -1010,6 +1172,8 @@ int setup_irq(unsigned int irq, struct irqaction *act)
        int retval;
        struct irq_desc *desc = irq_to_desc(irq);
 
+       if (WARN_ON(irq_settings_is_per_cpu_devid(desc)))
+               return -EINVAL;
        chip_bus_lock(desc);
        retval = __setup_irq(irq, desc, act);
        chip_bus_sync_unlock(desc);
@@ -1018,7 +1182,7 @@ int setup_irq(unsigned int irq, struct irqaction *act)
 }
 EXPORT_SYMBOL_GPL(setup_irq);
 
- /*
+/*
  * Internal function to unregister an irqaction - used to free
  * regular and special interrupts that are part of the architecture.
  */
@@ -1058,12 +1222,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
        /* Found it - now remove it from the list of entries: */
        *action_ptr = action->next;
 
-       /* Currently used only by UML, might disappear one day: */
-#ifdef CONFIG_IRQ_RELEASE_METHOD
-       if (desc->irq_data.chip->release)
-               desc->irq_data.chip->release(irq, dev_id);
-#endif
-
        /* If this was the last handler, shut down the IRQ line: */
        if (!desc->action)
                irq_shutdown(desc);
@@ -1098,11 +1256,11 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 #endif
 
        if (action->thread) {
-               if (!test_bit(IRQTF_DIED, &action->thread_flags))
-                       kthread_stop(action->thread);
+               kthread_stop(action->thread);
                put_task_struct(action->thread);
        }
 
+       module_put(desc->owner);
        return action;
 }
 
@@ -1115,7 +1273,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
  */
 void remove_irq(unsigned int irq, struct irqaction *act)
 {
-       __free_irq(irq, act->dev_id);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc)))
+           __free_irq(irq, act->dev_id);
 }
 EXPORT_SYMBOL_GPL(remove_irq);
 
@@ -1137,7 +1298,7 @@ void free_irq(unsigned int irq, void *dev_id)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       if (!desc)
+       if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc)))
                return;
 
 #ifdef CONFIG_SMP
@@ -1172,7 +1333,7 @@ EXPORT_SYMBOL(free_irq);
  *     and to set up the interrupt handler in the right order.
  *
  *     If you want to set up a threaded irq handler for your device
- *     then you need to supply @handler and @thread_fn. @handler ist
+ *     then you need to supply @handler and @thread_fn. @handler is
  *     still called in hard interrupt context and has to check
  *     whether the interrupt originates from the device. If yes it
  *     needs to disable the interrupt on the device and return
@@ -1190,7 +1351,6 @@ EXPORT_SYMBOL(free_irq);
  *     Flags:
  *
  *     IRQF_SHARED             Interrupt is shared
- *     IRQF_SAMPLE_RANDOM      The interrupt can be used for entropy
  *     IRQF_TRIGGER_*          Specify active edge(s) or level
  *
  */
@@ -1215,7 +1375,8 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
        if (!desc)
                return -EINVAL;
 
-       if (!irq_settings_can_request(desc))
+       if (!irq_settings_can_request(desc) ||
+           WARN_ON(irq_settings_is_per_cpu_devid(desc)))
                return -EINVAL;
 
        if (!handler) {
@@ -1300,3 +1461,194 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
        return !ret ? IRQC_IS_HARDIRQ : ret;
 }
 EXPORT_SYMBOL_GPL(request_any_context_irq);
+
+void enable_percpu_irq(unsigned int irq, unsigned int type)
+{
+       unsigned int cpu = smp_processor_id();
+       unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
+
+       if (!desc)
+               return;
+
+       type &= IRQ_TYPE_SENSE_MASK;
+       if (type != IRQ_TYPE_NONE) {
+               int ret;
+
+               ret = __irq_set_trigger(desc, irq, type);
+
+               if (ret) {
+                       WARN(1, "failed to set type for IRQ%d\n", irq);
+                       goto out;
+               }
+       }
+
+       irq_percpu_enable(desc, cpu);
+out:
+       irq_put_desc_unlock(desc, flags);
+}
+
+void disable_percpu_irq(unsigned int irq)
+{
+       unsigned int cpu = smp_processor_id();
+       unsigned long flags;
+       struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
+
+       if (!desc)
+               return;
+
+       irq_percpu_disable(desc, cpu);
+       irq_put_desc_unlock(desc, flags);
+}
+
+/*
+ * Internal function to unregister a percpu irqaction.
+ */
+static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irqaction *action;
+       unsigned long flags;
+
+       WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
+
+       if (!desc)
+               return NULL;
+
+       raw_spin_lock_irqsave(&desc->lock, flags);
+
+       action = desc->action;
+       if (!action || action->percpu_dev_id != dev_id) {
+               WARN(1, "Trying to free already-free IRQ %d\n", irq);
+               goto bad;
+       }
+
+       if (!cpumask_empty(desc->percpu_enabled)) {
+               WARN(1, "percpu IRQ %d still enabled on CPU%d!\n",
+                    irq, cpumask_first(desc->percpu_enabled));
+               goto bad;
+       }
+
+       /* Found it - now remove it from the list of entries: */
+       desc->action = NULL;
+
+       raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+       unregister_handler_proc(irq, action);
+
+       module_put(desc->owner);
+       return action;
+
+bad:
+       raw_spin_unlock_irqrestore(&desc->lock, flags);
+       return NULL;
+}
+
+/**
+ *     remove_percpu_irq - free a per-cpu interrupt
+ *     @irq: Interrupt line to free
+ *     @act: irqaction for the interrupt
+ *
+ * Used to remove interrupts statically setup by the early boot process.
+ */
+void remove_percpu_irq(unsigned int irq, struct irqaction *act)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc && irq_settings_is_per_cpu_devid(desc))
+           __free_percpu_irq(irq, act->percpu_dev_id);
+}
+
+/**
+ *     free_percpu_irq - free an interrupt allocated with request_percpu_irq
+ *     @irq: Interrupt line to free
+ *     @dev_id: Device identity to free
+ *
+ *     Remove a percpu interrupt handler. The handler is removed, but
+ *     the interrupt line is not disabled. This must be done on each
+ *     CPU before calling this function. The function does not return
+ *     until any executing interrupts for this IRQ have completed.
+ *
+ *     This function must not be called from interrupt context.
+ */
+void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (!desc || !irq_settings_is_per_cpu_devid(desc))
+               return;
+
+       chip_bus_lock(desc);
+       kfree(__free_percpu_irq(irq, dev_id));
+       chip_bus_sync_unlock(desc);
+}
+
+/**
+ *     setup_percpu_irq - setup a per-cpu interrupt
+ *     @irq: Interrupt line to setup
+ *     @act: irqaction for the interrupt
+ *
+ * Used to statically setup per-cpu interrupts in the early boot process.
+ */
+int setup_percpu_irq(unsigned int irq, struct irqaction *act)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       int retval;
+
+       if (!desc || !irq_settings_is_per_cpu_devid(desc))
+               return -EINVAL;
+       chip_bus_lock(desc);
+       retval = __setup_irq(irq, desc, act);
+       chip_bus_sync_unlock(desc);
+
+       return retval;
+}
+
+/**
+ *     request_percpu_irq - allocate a percpu interrupt line
+ *     @irq: Interrupt line to allocate
+ *     @handler: Function to be called when the IRQ occurs.
+ *     @devname: An ascii name for the claiming device
+ *     @dev_id: A percpu cookie passed back to the handler function
+ *
+ *     This call allocates interrupt resources, but doesn't
+ *     automatically enable the interrupt. It has to be done on each
+ *     CPU using enable_percpu_irq().
+ *
+ *     Dev_id must be globally unique. It is a per-cpu variable, and
+ *     the handler gets called with the interrupted CPU's instance of
+ *     that variable.
+ */
+int request_percpu_irq(unsigned int irq, irq_handler_t handler,
+                      const char *devname, void __percpu *dev_id)
+{
+       struct irqaction *action;
+       struct irq_desc *desc;
+       int retval;
+
+       if (!dev_id)
+               return -EINVAL;
+
+       desc = irq_to_desc(irq);
+       if (!desc || !irq_settings_can_request(desc) ||
+           !irq_settings_is_per_cpu_devid(desc))
+               return -EINVAL;
+
+       action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+       if (!action)
+               return -ENOMEM;
+
+       action->handler = handler;
+       action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND;
+       action->name = devname;
+       action->percpu_dev_id = dev_id;
+
+       chip_bus_lock(desc);
+       retval = __setup_irq(irq, desc, action);
+       chip_bus_sync_unlock(desc);
+
+       if (retval)
+               kfree(action);
+
+       return retval;
+}