Blackfin: SMP: rewrite IPI handling to avoid memory allocation
Yi Li [Thu, 17 Dec 2009 08:20:32 +0000 (08:20 +0000)]
Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation.  But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic().  So create a per-cpu static array
for the message queue and use that instead.

Further, while we have two supplemental interrupts, we are currently only
using one of them.  So use the second one for the most common IPI message
of all -- smp_send_reschedule().  This avoids ugly contention for locks
which in turn would require an IPI message ...

In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before.  Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock.  If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.

After running some various stress tests on the system, the static limit
of 5 messages seems to work.  On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.

Signed-off-by: Yi Li <yi.li@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>

arch/blackfin/mach-bf561/include/mach/smp.h
arch/blackfin/mach-bf561/smp.c
arch/blackfin/mach-common/smp.c

index 70cafb9..346c605 100644 (file)
@@ -19,13 +19,13 @@ int platform_boot_secondary(unsigned int cpu, struct task_struct *idle);
 
 void platform_secondary_init(unsigned int cpu);
 
-void platform_request_ipi(/*irq_handler_t*/ void *handler);
+void platform_request_ipi(int irq, /*irq_handler_t*/ void *handler);
 
-void platform_send_ipi(cpumask_t callmap);
+void platform_send_ipi(cpumask_t callmap, int irq);
 
-void platform_send_ipi_cpu(unsigned int cpu);
+void platform_send_ipi_cpu(unsigned int cpu, int irq);
 
-void platform_clear_ipi(unsigned int cpu);
+void platform_clear_ipi(unsigned int cpu, int irq);
 
 void bfin_local_timer_setup(void);
 
index 1a19fad..1074a7e 100644 (file)
@@ -111,41 +111,46 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle
                panic("CPU%u: processor failed to boot\n", cpu);
 }
 
-void __init platform_request_ipi(void *handler)
+static const char supple0[] = "IRQ_SUPPLE_0";
+static const char supple1[] = "IRQ_SUPPLE_1";
+void __init platform_request_ipi(int irq, void *handler)
 {
        int ret;
+       const char *name = (irq == IRQ_SUPPLE_0) ? supple0 : supple1;
 
-       ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED,
-                         "Supplemental Interrupt0", handler);
+       ret = request_irq(irq, handler, IRQF_DISABLED | IRQF_PERCPU, name, handler);
        if (ret)
-               panic("Cannot request supplemental interrupt 0 for IPI service");
+               panic("Cannot request %s for IPI service", name);
 }
 
-void platform_send_ipi(cpumask_t callmap)
+void platform_send_ipi(cpumask_t callmap, int irq)
 {
        unsigned int cpu;
+       int offset = (irq == IRQ_SUPPLE_0) ? 6 : 8;
 
        for_each_cpu_mask(cpu, callmap) {
                BUG_ON(cpu >= 2);
                SSYNC();
-               bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu)));
+               bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
                SSYNC();
        }
 }
 
-void platform_send_ipi_cpu(unsigned int cpu)
+void platform_send_ipi_cpu(unsigned int cpu, int irq)
 {
+       int offset = (irq == IRQ_SUPPLE_0) ? 6 : 8;
        BUG_ON(cpu >= 2);
        SSYNC();
-       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu)));
+       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
        SSYNC();
 }
 
-void platform_clear_ipi(unsigned int cpu)
+void platform_clear_ipi(unsigned int cpu, int irq)
 {
+       int offset = (irq == IRQ_SUPPLE_0) ? 10 : 12;
        BUG_ON(cpu >= 2);
        SSYNC();
-       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (10 + cpu)));
+       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
        SSYNC();
 }
 
index 930608d..9f25140 100644 (file)
@@ -60,8 +60,7 @@ struct smp_call_struct {
        void (*func)(void *info);
        void *info;
        int wait;
-       cpumask_t pending;
-       cpumask_t waitmask;
+       cpumask_t *waitmask;
 };
 
 static struct blackfin_flush_data smp_flush_data;
@@ -69,15 +68,19 @@ static struct blackfin_flush_data smp_flush_data;
 static DEFINE_SPINLOCK(stop_lock);
 
 struct ipi_message {
-       struct list_head list;
        unsigned long type;
        struct smp_call_struct call_struct;
 };
 
+/* A magic number - stress test shows this is safe for common cases */
+#define BFIN_IPI_MSGQ_LEN 5
+
+/* Simple FIFO buffer, overflow leads to panic */
 struct ipi_message_queue {
-       struct list_head head;
        spinlock_t lock;
        unsigned long count;
+       unsigned long head; /* head of the queue */
+       struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN];
 };
 
 static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue);
@@ -116,7 +119,6 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
        func = msg->call_struct.func;
        info = msg->call_struct.info;
        wait = msg->call_struct.wait;
-       cpu_clear(cpu, msg->call_struct.pending);
        func(info);
        if (wait) {
 #ifdef __ARCH_SYNC_CORE_DCACHE
@@ -127,51 +129,57 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
                 */
                resync_core_dcache();
 #endif
-               cpu_clear(cpu, msg->call_struct.waitmask);
-       } else
-               kfree(msg);
+               cpu_clear(cpu, *msg->call_struct.waitmask);
+       }
 }
 
-static irqreturn_t ipi_handler(int irq, void *dev_instance)
+/* Use IRQ_SUPPLE_0 to request reschedule.
+ * When returning from interrupt to user space,
+ * there is chance to reschedule */
+static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
+{
+       unsigned int cpu = smp_processor_id();
+
+       platform_clear_ipi(cpu, IRQ_SUPPLE_0);
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 {
        struct ipi_message *msg;
        struct ipi_message_queue *msg_queue;
        unsigned int cpu = smp_processor_id();
+       unsigned long flags;
 
-       platform_clear_ipi(cpu);
+       platform_clear_ipi(cpu, IRQ_SUPPLE_1);
 
        msg_queue = &__get_cpu_var(ipi_msg_queue);
-       msg_queue->count++;
 
-       spin_lock(&msg_queue->lock);
-       while (!list_empty(&msg_queue->head)) {
-               msg = list_entry(msg_queue->head.next, typeof(*msg), list);
-               list_del(&msg->list);
+       spin_lock_irqsave(&msg_queue->lock, flags);
+
+       while (msg_queue->count) {
+               msg = &msg_queue->ipi_message[msg_queue->head];
                switch (msg->type) {
-               case BFIN_IPI_RESCHEDULE:
-                       /* That's the easiest one; leave it to
-                        * return_from_int. */
-                       kfree(msg);
-                       break;
                case BFIN_IPI_CALL_FUNC:
-                       spin_unlock(&msg_queue->lock);
+                       spin_unlock_irqrestore(&msg_queue->lock, flags);
                        ipi_call_function(cpu, msg);
-                       spin_lock(&msg_queue->lock);
+                       spin_lock_irqsave(&msg_queue->lock, flags);
                        break;
                case BFIN_IPI_CPU_STOP:
-                       spin_unlock(&msg_queue->lock);
+                       spin_unlock_irqrestore(&msg_queue->lock, flags);
                        ipi_cpu_stop(cpu);
-                       spin_lock(&msg_queue->lock);
-                       kfree(msg);
+                       spin_lock_irqsave(&msg_queue->lock, flags);
                        break;
                default:
                        printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
                               cpu, msg->type);
-                       kfree(msg);
                        break;
                }
+               msg_queue->head++;
+               msg_queue->head %= BFIN_IPI_MSGQ_LEN;
+               msg_queue->count--;
        }
-       spin_unlock(&msg_queue->lock);
+       spin_unlock_irqrestore(&msg_queue->lock, flags);
        return IRQ_HANDLED;
 }
 
@@ -181,48 +189,47 @@ static void ipi_queue_init(void)
        struct ipi_message_queue *msg_queue;
        for_each_possible_cpu(cpu) {
                msg_queue = &per_cpu(ipi_msg_queue, cpu);
-               INIT_LIST_HEAD(&msg_queue->head);
                spin_lock_init(&msg_queue->lock);
                msg_queue->count = 0;
+               msg_queue->head = 0;
        }
 }
 
-int smp_call_function(void (*func)(void *info), void *info, int wait)
+static inline void smp_send_message(cpumask_t callmap, unsigned long type,
+                                       void (*func) (void *info), void *info, int wait)
 {
        unsigned int cpu;
-       cpumask_t callmap;
-       unsigned long flags;
        struct ipi_message_queue *msg_queue;
        struct ipi_message *msg;
-
-       callmap = cpu_online_map;
-       cpu_clear(smp_processor_id(), callmap);
-       if (cpus_empty(callmap))
-               return 0;
-
-       msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
-       if (!msg)
-               return -ENOMEM;
-       INIT_LIST_HEAD(&msg->list);
-       msg->call_struct.func = func;
-       msg->call_struct.info = info;
-       msg->call_struct.wait = wait;
-       msg->call_struct.pending = callmap;
-       msg->call_struct.waitmask = callmap;
-       msg->type = BFIN_IPI_CALL_FUNC;
+       unsigned long flags, next_msg;
+       cpumask_t waitmask = callmap; /* waitmask is shared by all cpus */
 
        for_each_cpu_mask(cpu, callmap) {
                msg_queue = &per_cpu(ipi_msg_queue, cpu);
                spin_lock_irqsave(&msg_queue->lock, flags);
-               list_add_tail(&msg->list, &msg_queue->head);
+               if (msg_queue->count < BFIN_IPI_MSGQ_LEN) {
+                       next_msg = (msg_queue->head + msg_queue->count)
+                                       % BFIN_IPI_MSGQ_LEN;
+                       msg = &msg_queue->ipi_message[next_msg];
+                       msg->type = type;
+                       if (type == BFIN_IPI_CALL_FUNC) {
+                               msg->call_struct.func = func;
+                               msg->call_struct.info = info;
+                               msg->call_struct.wait = wait;
+                               msg->call_struct.waitmask = &waitmask;
+                       }
+                       msg_queue->count++;
+               } else
+                       panic("IPI message queue overflow\n");
                spin_unlock_irqrestore(&msg_queue->lock, flags);
-               platform_send_ipi_cpu(cpu);
+               platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
        }
+
        if (wait) {
-               while (!cpus_empty(msg->call_struct.waitmask))
+               while (!cpus_empty(waitmask))
                        blackfin_dcache_invalidate_range(
-                               (unsigned long)(&msg->call_struct.waitmask),
-                               (unsigned long)(&msg->call_struct.waitmask));
+                               (unsigned long)(&waitmask),
+                               (unsigned long)(&waitmask));
 #ifdef __ARCH_SYNC_CORE_DCACHE
                /*
                 * Invalidate D cache in case shared data was changed by
@@ -230,8 +237,20 @@ int smp_call_function(void (*func)(void *info), void *info, int wait)
                 */
                resync_core_dcache();
 #endif
-               kfree(msg);
        }
+}
+
+int smp_call_function(void (*func)(void *info), void *info, int wait)
+{
+       cpumask_t callmap;
+
+       callmap = cpu_online_map;
+       cpu_clear(smp_processor_id(), callmap);
+       if (cpus_empty(callmap))
+               return 0;
+
+       smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(smp_call_function);
@@ -241,100 +260,39 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
 {
        unsigned int cpu = cpuid;
        cpumask_t callmap;
-       unsigned long flags;
-       struct ipi_message_queue *msg_queue;
-       struct ipi_message *msg;
 
        if (cpu_is_offline(cpu))
                return 0;
        cpus_clear(callmap);
        cpu_set(cpu, callmap);
 
-       msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
-       if (!msg)
-               return -ENOMEM;
-       INIT_LIST_HEAD(&msg->list);
-       msg->call_struct.func = func;
-       msg->call_struct.info = info;
-       msg->call_struct.wait = wait;
-       msg->call_struct.pending = callmap;
-       msg->call_struct.waitmask = callmap;
-       msg->type = BFIN_IPI_CALL_FUNC;
-
-       msg_queue = &per_cpu(ipi_msg_queue, cpu);
-       spin_lock_irqsave(&msg_queue->lock, flags);
-       list_add_tail(&msg->list, &msg_queue->head);
-       spin_unlock_irqrestore(&msg_queue->lock, flags);
-       platform_send_ipi_cpu(cpu);
+       smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
 
-       if (wait) {
-               while (!cpus_empty(msg->call_struct.waitmask))
-                       blackfin_dcache_invalidate_range(
-                               (unsigned long)(&msg->call_struct.waitmask),
-                               (unsigned long)(&msg->call_struct.waitmask));
-#ifdef __ARCH_SYNC_CORE_DCACHE
-               /*
-                * Invalidate D cache in case shared data was changed by
-                * other processors to ensure cache coherence.
-                */
-               resync_core_dcache();
-#endif
-               kfree(msg);
-       }
        return 0;
 }
 EXPORT_SYMBOL_GPL(smp_call_function_single);
 
 void smp_send_reschedule(int cpu)
 {
-       unsigned long flags;
-       struct ipi_message_queue *msg_queue;
-       struct ipi_message *msg;
-
+       /* simply trigger an ipi */
        if (cpu_is_offline(cpu))
                return;
-
-       msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
-       if (!msg)
-               return;
-       INIT_LIST_HEAD(&msg->list);
-       msg->type = BFIN_IPI_RESCHEDULE;
-
-       msg_queue = &per_cpu(ipi_msg_queue, cpu);
-       spin_lock_irqsave(&msg_queue->lock, flags);
-       list_add_tail(&msg->list, &msg_queue->head);
-       spin_unlock_irqrestore(&msg_queue->lock, flags);
-       platform_send_ipi_cpu(cpu);
+       platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0);
 
        return;
 }
 
 void smp_send_stop(void)
 {
-       unsigned int cpu;
        cpumask_t callmap;
-       unsigned long flags;
-       struct ipi_message_queue *msg_queue;
-       struct ipi_message *msg;
 
        callmap = cpu_online_map;
        cpu_clear(smp_processor_id(), callmap);
        if (cpus_empty(callmap))
                return;
 
-       msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
-       if (!msg)
-               return;
-       INIT_LIST_HEAD(&msg->list);
-       msg->type = BFIN_IPI_CPU_STOP;
+       smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0);
 
-       for_each_cpu_mask(cpu, callmap) {
-               msg_queue = &per_cpu(ipi_msg_queue, cpu);
-               spin_lock_irqsave(&msg_queue->lock, flags);
-               list_add_tail(&msg->list, &msg_queue->head);
-               spin_unlock_irqrestore(&msg_queue->lock, flags);
-               platform_send_ipi_cpu(cpu);
-       }
        return;
 }
 
@@ -441,7 +399,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 {
        platform_prepare_cpus(max_cpus);
        ipi_queue_init();
-       platform_request_ipi(ipi_handler);
+       platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
+       platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)