atomic: use <linux/atomic.h>
[linux-2.6.git] / arch / powerpc / kernel / smp.c
index 924d692..7bf2187 100644 (file)
@@ -33,7 +33,7 @@
 #include <linux/topology.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -41,6 +41,7 @@
 #include <asm/smp.h>
 #include <asm/time.h>
 #include <asm/machdep.h>
+#include <asm/cputhreads.h>
 #include <asm/cputable.h>
 #include <asm/system.h>
 #include <asm/mpic.h>
 #define DBG(fmt...)
 #endif
 
-int smp_hw_index[NR_CPUS];
+
+/* Store all idle threads, this can be reused instead of creating
+* a new thread. Also avoids complicated thread destroy functionality
+* for idle threads.
+*/
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
+ * removed after init for !CONFIG_HOTPLUG_CPU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
+#define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
+#define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
+#else
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+#define get_idle_for_cpu(x)      (idle_thread_array[(x)])
+#define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
+#endif
+
 struct thread_info *secondary_ti;
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
 
-EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_possible_map);
-EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
 
-static volatile unsigned int cpu_callin_map[NR_CPUS];
-
-void smp_call_function_interrupt(void);
+/* Can't be static due to PowerMac hackery */
+volatile unsigned int cpu_callin_map[NR_CPUS];
 
 int smt_enabled_at_boot = 1;
 
 static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
-void __devinit smp_generic_kick_cpu(int nr)
+int __devinit smp_generic_kick_cpu(int nr)
 {
        BUG_ON(nr < 0 || nr >= NR_CPUS);
 
@@ -90,238 +106,210 @@ void __devinit smp_generic_kick_cpu(int nr)
         */
        paca[nr].cpu_start = 1;
        smp_mb();
+
+       return 0;
 }
 #endif
 
-void smp_message_recv(int msg)
-{
-       switch(msg) {
-       case PPC_MSG_CALL_FUNCTION:
-               smp_call_function_interrupt();
-               break;
-       case PPC_MSG_RESCHEDULE:
-               /* XXX Do we have to do this? */
-               set_need_resched();
-               break;
-       case PPC_MSG_DEBUGGER_BREAK:
-               if (crash_ipi_function_ptr) {
-                       crash_ipi_function_ptr(get_irq_regs());
-                       break;
-               }
-#ifdef CONFIG_DEBUGGER
-               debugger_ipi(get_irq_regs());
-               break;
-#endif /* CONFIG_DEBUGGER */
-               /* FALLTHROUGH */
-       default:
-               printk("SMP %d: smp_message_recv(): unknown msg %d\n",
-                      smp_processor_id(), msg);
-               break;
-       }
+static irqreturn_t call_function_action(int irq, void *data)
+{
+       generic_smp_call_function_interrupt();
+       return IRQ_HANDLED;
 }
 
-void smp_send_reschedule(int cpu)
+static irqreturn_t reschedule_action(int irq, void *data)
 {
-       if (likely(smp_ops))
-               smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
+       scheduler_ipi();
+       return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+static irqreturn_t call_function_single_action(int irq, void *data)
 {
-       if (likely(smp_ops))
-               smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+       generic_smp_call_function_single_interrupt();
+       return IRQ_HANDLED;
 }
-#endif
 
-#ifdef CONFIG_KEXEC
-void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+static irqreturn_t debug_ipi_action(int irq, void *data)
 {
-       crash_ipi_function_ptr = crash_ipi_callback;
-       if (crash_ipi_callback && smp_ops) {
-               mb();
-               smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
+       if (crash_ipi_function_ptr) {
+               crash_ipi_function_ptr(get_irq_regs());
+               return IRQ_HANDLED;
        }
+
+#ifdef CONFIG_DEBUGGER
+       debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
+       return IRQ_HANDLED;
 }
-#endif
 
-static void stop_this_cpu(void *dummy)
+static irq_handler_t smp_ipi_action[] = {
+       [PPC_MSG_CALL_FUNCTION] =  call_function_action,
+       [PPC_MSG_RESCHEDULE] = reschedule_action,
+       [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+       [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+};
+
+const char *smp_ipi_name[] = {
+       [PPC_MSG_CALL_FUNCTION] =  "ipi call function",
+       [PPC_MSG_RESCHEDULE] = "ipi reschedule",
+       [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+       [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+};
+
+/* optional function to request ipi, for controllers with >= 4 ipis */
+int smp_request_message_ipi(int virq, int msg)
 {
-       local_irq_disable();
-       while (1)
-               ;
+       int err;
+
+       if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+               return -EINVAL;
+       }
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+       if (msg == PPC_MSG_DEBUGGER_BREAK) {
+               return 1;
+       }
+#endif
+       err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
+                         smp_ipi_name[msg], 0);
+       WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
+               virq, smp_ipi_name[msg], err);
+
+       return err;
 }
 
-void smp_send_stop(void)
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+struct cpu_messages {
+       int messages;                   /* current messages */
+       unsigned long data;             /* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
 {
-       smp_call_function(stop_this_cpu, NULL, 1, 0);
-}
+       struct cpu_messages *info = &per_cpu(ipi_message, cpu);
 
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- * Stolen from the i386 version.
- */
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
+       info->data = data;
+}
 
-static struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t started;
-       atomic_t finished;
-       int wait;
-} *call_data;
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+       struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+       char *message = (char *)&info->messages;
 
-/* delay of at least 8 seconds */
-#define SMP_CALL_TIMEOUT       8
+       message[msg] = 1;
+       mb();
+       smp_ops->cause_ipi(cpu, info->data);
+}
 
-/*
- * This function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- *
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-                      int wait)
-{ 
-       struct call_data_struct data;
-       int ret = -1, cpus;
-       u64 timeout;
-
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
-
-       if (unlikely(smp_ops == NULL))
-               return -1;
-
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.started, 0);
-       data.wait = wait;
-       if (wait)
-               atomic_set(&data.finished, 0);
-
-       spin_lock(&call_lock);
-       /* Must grab online cpu count with preempt disabled, otherwise
-        * it can change. */
-       cpus = num_online_cpus() - 1;
-       if (!cpus) {
-               ret = 0;
-               goto out;
-       }
+irqreturn_t smp_ipi_demux(void)
+{
+       struct cpu_messages *info = &__get_cpu_var(ipi_message);
+       unsigned int all;
+
+       mb();   /* order any irq clear */
+
+       do {
+               all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+               if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
+                       generic_smp_call_function_interrupt();
+               if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
+                       scheduler_ipi();
+               if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
+                       generic_smp_call_function_single_interrupt();
+               if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
+                       debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
+#endif
+       } while (info->messages);
 
-       call_data = &data;
-       smp_wmb();
-       /* Send a message to all other CPUs and wait for them to respond */
-       smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION);
-
-       timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec;
-
-       /* Wait for response */
-       while (atomic_read(&data.started) != cpus) {
-               HMT_low();
-               if (get_tb() >= timeout) {
-                       printk("smp_call_function on cpu %d: other cpus not "
-                              "responding (%d)\n", smp_processor_id(),
-                              atomic_read(&data.started));
-                       debugger(NULL);
-                       goto out;
-               }
-       }
+       return IRQ_HANDLED;
+}
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
 
-       if (wait) {
-               while (atomic_read(&data.finished) != cpus) {
-                       HMT_low();
-                       if (get_tb() >= timeout) {
-                               printk("smp_call_function on cpu %d: other "
-                                      "cpus not finishing (%d/%d)\n",
-                                      smp_processor_id(),
-                                      atomic_read(&data.finished),
-                                      atomic_read(&data.started));
-                               debugger(NULL);
-                               goto out;
-                       }
-               }
-       }
+static inline void do_message_pass(int cpu, int msg)
+{
+       if (smp_ops->message_pass)
+               smp_ops->message_pass(cpu, msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
+       else
+               smp_muxed_ipi_message_pass(cpu, msg);
+#endif
+}
 
-       ret = 0;
+void smp_send_reschedule(int cpu)
+{
+       if (likely(smp_ops))
+               do_message_pass(cpu, PPC_MSG_RESCHEDULE);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
 
- out:
-       call_data = NULL;
-       HMT_medium();
-       spin_unlock(&call_lock);
-       return ret;
+void arch_send_call_function_single_ipi(int cpu)
+{
+       do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
 }
 
-EXPORT_SYMBOL(smp_call_function);
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask)
+               do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
 
-void smp_call_function_interrupt(void)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
 {
-       void (*func) (void *info);
-       void *info;
-       int wait;
+       int cpu;
+       int me = raw_smp_processor_id();
 
-       /* call_data will be NULL if the sender timed out while
-        * waiting on us to receive the call.
-        */
-       if (!call_data)
+       if (unlikely(!smp_ops))
                return;
 
-       func = call_data->func;
-       info = call_data->info;
-       wait = call_data->wait;
-
-       if (!wait)
-               smp_mb__before_atomic_inc();
+       for_each_online_cpu(cpu)
+               if (cpu != me)
+                       do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+}
+#endif
 
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       atomic_inc(&call_data->started);
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
-       (*func)(info);
-       if (wait) {
-               smp_mb__before_atomic_inc();
-               atomic_inc(&call_data->finished);
+#ifdef CONFIG_KEXEC
+void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+       crash_ipi_function_ptr = crash_ipi_callback;
+       if (crash_ipi_callback) {
+               mb();
+               smp_send_debugger_break();
        }
 }
+#endif
 
-extern struct gettimeofday_struct do_gtod;
-
-struct thread_info *current_set[NR_CPUS];
+static void stop_this_cpu(void *dummy)
+{
+       /* Remove this CPU */
+       set_cpu_online(smp_processor_id(), false);
 
-DECLARE_PER_CPU(unsigned int, pvr);
+       local_irq_disable();
+       while (1)
+               ;
+}
 
-static void __devinit smp_store_cpu_info(int id)
+void smp_send_stop(void)
 {
-       per_cpu(pvr, id) = mfspr(SPRN_PVR);
+       smp_call_function(stop_this_cpu, NULL, 0);
 }
 
-static void __init smp_create_idle(unsigned int cpu)
-{
-       struct task_struct *p;
+struct thread_info *current_set[NR_CPUS];
 
-       /* create a process for the processor */
-       p = fork_idle(cpu);
-       if (IS_ERR(p))
-               panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
-#ifdef CONFIG_PPC64
-       paca[cpu].__current = p;
+static void __devinit smp_store_cpu_info(int id)
+{
+       per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
+#ifdef CONFIG_PPC_FSL_BOOK3E
+       per_cpu(next_tlbcam_idx, id)
+               = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
 #endif
-       current_set[cpu] = task_thread_info(p);
-       task_thread_info(p)->cpu = cpu;
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -340,23 +328,28 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
        smp_store_cpu_info(boot_cpuid);
        cpu_callin_map[boot_cpuid] = 1;
 
+       for_each_possible_cpu(cpu) {
+               zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
+                                       GFP_KERNEL, cpu_to_node(cpu));
+               zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
+                                       GFP_KERNEL, cpu_to_node(cpu));
+       }
+
+       cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+       cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+
        if (smp_ops)
-               max_cpus = smp_ops->probe();
+               if (smp_ops->probe)
+                       max_cpus = smp_ops->probe();
+               else
+                       max_cpus = NR_CPUS;
        else
                max_cpus = 1;
-       smp_space_timers(max_cpus);
-
-       for_each_possible_cpu(cpu)
-               if (cpu != boot_cpuid)
-                       smp_create_idle(cpu);
 }
 
 void __devinit smp_prepare_boot_cpu(void)
 {
        BUG_ON(smp_processor_id() != boot_cpuid);
-
-       cpu_set(boot_cpuid, cpu_online_map);
 #ifdef CONFIG_PPC64
        paca[boot_cpuid].__current = current;
 #endif
@@ -365,7 +358,7 @@ void __devinit smp_prepare_boot_cpu(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* State of each CPU during hotplug phases */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
+static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 
 int generic_cpu_disable(void)
 {
@@ -374,33 +367,11 @@ int generic_cpu_disable(void)
        if (cpu == boot_cpuid)
                return -EBUSY;
 
-       cpu_clear(cpu, cpu_online_map);
+       set_cpu_online(cpu, false);
 #ifdef CONFIG_PPC64
        vdso_data->processorCount--;
-       fixup_irqs(cpu_online_map);
-#endif
-       return 0;
-}
-
-int generic_cpu_enable(unsigned int cpu)
-{
-       /* Do the normal bootup if we haven't
-        * already bootstrapped. */
-       if (system_state != SYSTEM_RUNNING)
-               return -ENOSYS;
-
-       /* get the target out of it's holding state */
-       per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-       smp_wmb();
-
-       while (!cpu_online(cpu))
-               cpu_relax();
-
-#ifdef CONFIG_PPC64
-       fixup_irqs(cpu_online_map);
-       /* counter the irq disable in fixup_irqs */
-       local_irq_enable();
 #endif
+       migrate_irqs();
        return 0;
 }
 
@@ -422,41 +393,89 @@ void generic_mach_cpu_die(void)
        unsigned int cpu;
 
        local_irq_disable();
+       idle_task_exit();
        cpu = smp_processor_id();
        printk(KERN_DEBUG "CPU%d offline\n", cpu);
        __get_cpu_var(cpu_state) = CPU_DEAD;
        smp_wmb();
        while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                cpu_relax();
+}
 
-#ifdef CONFIG_PPC64
-       flush_tlb_pending();
-#endif
-       cpu_set(cpu, cpu_online_map);
-       local_irq_enable();
+void generic_set_cpu_dead(unsigned int cpu)
+{
+       per_cpu(cpu_state, cpu) = CPU_DEAD;
 }
 #endif
 
-static int __devinit cpu_enable(unsigned int cpu)
+struct create_idle {
+       struct work_struct work;
+       struct task_struct *idle;
+       struct completion done;
+       int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
 {
-       if (smp_ops && smp_ops->cpu_enable)
-               return smp_ops->cpu_enable(cpu);
+       struct create_idle *c_idle =
+               container_of(work, struct create_idle, work);
 
-       return -ENOSYS;
+       c_idle->idle = fork_idle(c_idle->cpu);
+       complete(&c_idle->done);
 }
 
-int __cpuinit __cpu_up(unsigned int cpu)
+static int __cpuinit create_idle(unsigned int cpu)
 {
-       int c;
+       struct thread_info *ti;
+       struct create_idle c_idle = {
+               .cpu    = cpu,
+               .done   = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+       };
+       INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+
+       c_idle.idle = get_idle_for_cpu(cpu);
+
+       /* We can't use kernel_thread since we must avoid to
+        * reschedule the child. We use a workqueue because
+        * we want to fork from a kernel thread, not whatever
+        * userspace process happens to be trying to online us.
+        */
+       if (!c_idle.idle) {
+               schedule_work(&c_idle.work);
+               wait_for_completion(&c_idle.done);
+       } else
+               init_idle(c_idle.idle, cpu);
+       if (IS_ERR(c_idle.idle)) {              
+               pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle));
+               return PTR_ERR(c_idle.idle);
+       }
+       ti = task_thread_info(c_idle.idle);
 
-       secondary_ti = current_set[cpu];
-       if (!cpu_enable(cpu))
-               return 0;
+#ifdef CONFIG_PPC64
+       paca[cpu].__current = c_idle.idle;
+       paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+#endif
+       ti->cpu = cpu;
+       current_set[cpu] = ti;
+
+       return 0;
+}
+
+int __cpuinit __cpu_up(unsigned int cpu)
+{
+       int rc, c;
 
        if (smp_ops == NULL ||
            (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
                return -EINVAL;
 
+       /* Make sure we have an idle thread */
+       rc = create_idle(cpu);
+       if (rc)
+               return rc;
+
+       secondary_ti = current_set[cpu];
+
        /* Make sure callin-map entry is 0 (can be leftover a CPU
         * hotplug
         */
@@ -470,7 +489,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
        /* wake up cpus */
        DBG("smp: kicking cpu %d\n", cpu);
-       smp_ops->kick_cpu(cpu);
+       rc = smp_ops->kick_cpu(cpu);
+       if (rc) {
+               pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+               return rc;
+       }
 
        /*
         * wait to see if the cpu made a callin (is actually up).
@@ -486,17 +509,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
                 * CPUs can take much longer to come up in the
                 * hotplug case.  Wait five seconds.
                 */
-               for (c = 25; c && !cpu_callin_map[cpu]; c--) {
-                       msleep(200);
-               }
+               for (c = 5000; c && !cpu_callin_map[cpu]; c--)
+                       msleep(1);
 #endif
 
        if (!cpu_callin_map[cpu]) {
-               printk("Processor %u is stuck.\n", cpu);
+               printk(KERN_ERR "Processor %u is stuck.\n", cpu);
                return -ENOENT;
        }
 
-       printk("Processor %u found.\n", cpu);
+       DBG("Processor %u found.\n", cpu);
 
        if (smp_ops->give_timebase)
                smp_ops->give_timebase();
@@ -508,11 +530,70 @@ int __cpuinit __cpu_up(unsigned int cpu)
        return 0;
 }
 
+/* Return the value of the reg property corresponding to the given
+ * logical cpu.
+ */
+int cpu_to_core_id(int cpu)
+{
+       struct device_node *np;
+       const int *reg;
+       int id = -1;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (!np)
+               goto out;
+
+       reg = of_get_property(np, "reg", NULL);
+       if (!reg)
+               goto out;
+
+       id = *reg;
+out:
+       of_node_put(np);
+       return id;
+}
+
+/* Helper routines for cpu to core mapping */
+int cpu_core_index_of_thread(int cpu)
+{
+       return cpu >> threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
+
+int cpu_first_thread_of_core(int core)
+{
+       return core << threads_shift;
+}
+EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
+
+/* Must be called when no change can occur to cpu_present_mask,
+ * i.e. during cpu online or offline.
+ */
+static struct device_node *cpu_to_l2cache(int cpu)
+{
+       struct device_node *np;
+       struct device_node *cache;
+
+       if (!cpu_present(cpu))
+               return NULL;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (np == NULL)
+               return NULL;
+
+       cache = of_find_next_cache_node(np);
+
+       of_node_put(np);
+
+       return cache;
+}
 
 /* Activate a secondary processor. */
-int __devinit start_secondary(void *unused)
+void __devinit start_secondary(void *unused)
 {
        unsigned int cpu = smp_processor_id();
+       struct device_node *l2_cache;
+       int i, base;
 
        atomic_inc(&init_mm.mm_count);
        current->active_mm = &init_mm;
@@ -522,21 +603,54 @@ int __devinit start_secondary(void *unused)
        preempt_disable();
        cpu_callin_map[cpu] = 1;
 
-       smp_ops->setup_cpu(cpu);
+       if (smp_ops->setup_cpu)
+               smp_ops->setup_cpu(cpu);
        if (smp_ops->take_timebase)
                smp_ops->take_timebase();
 
-       if (system_state > SYSTEM_BOOTING)
-               snapshot_timebase();
+       secondary_cpu_time_init();
 
-       spin_lock(&call_lock);
-       cpu_set(cpu, cpu_online_map);
-       spin_unlock(&call_lock);
+#ifdef CONFIG_PPC64
+       if (system_state == SYSTEM_RUNNING)
+               vdso_data->processorCount++;
+#endif
+       ipi_call_lock();
+       notify_cpu_starting(cpu);
+       set_cpu_online(cpu, true);
+       /* Update sibling maps */
+       base = cpu_first_thread_sibling(cpu);
+       for (i = 0; i < threads_per_core; i++) {
+               if (cpu_is_offline(base + i))
+                       continue;
+               cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
+               cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
+
+               /* cpu_core_map should be a superset of
+                * cpu_sibling_map even if we don't have cache
+                * information, so update the former here, too.
+                */
+               cpumask_set_cpu(cpu, cpu_core_mask(base + i));
+               cpumask_set_cpu(base + i, cpu_core_mask(cpu));
+       }
+       l2_cache = cpu_to_l2cache(cpu);
+       for_each_online_cpu(i) {
+               struct device_node *np = cpu_to_l2cache(i);
+               if (!np)
+                       continue;
+               if (np == l2_cache) {
+                       cpumask_set_cpu(cpu, cpu_core_mask(i));
+                       cpumask_set_cpu(i, cpu_core_mask(cpu));
+               }
+               of_node_put(np);
+       }
+       of_node_put(l2_cache);
+       ipi_call_unlock();
 
        local_irq_enable();
 
        cpu_idle();
-       return 0;
+
+       BUG();
 }
 
 int setup_profiling_timer(unsigned int multiplier)
@@ -546,32 +660,78 @@ int setup_profiling_timer(unsigned int multiplier)
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-       cpumask_t old_mask;
+       cpumask_var_t old_mask;
 
        /* We want the setup_cpu() here to be called from CPU 0, but our
         * init thread may have been "borrowed" by another CPU in the meantime
         * se we pin us down to CPU 0 for a short while
         */
-       old_mask = current->cpus_allowed;
-       set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
+       alloc_cpumask_var(&old_mask, GFP_NOWAIT);
+       cpumask_copy(old_mask, tsk_cpus_allowed(current));
+       set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
        
-       if (smp_ops)
+       if (smp_ops && smp_ops->setup_cpu)
                smp_ops->setup_cpu(boot_cpuid);
 
-       set_cpus_allowed(current, old_mask);
+       set_cpus_allowed_ptr(current, old_mask);
 
-       snapshot_timebases();
+       free_cpumask_var(old_mask);
+
+       if (smp_ops && smp_ops->bringup_done)
+               smp_ops->bringup_done();
 
        dump_numa_cpu_topology();
+
+}
+
+int arch_sd_sibling_asym_packing(void)
+{
+       if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+               printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+               return SD_ASYM_PACKING;
+       }
+       return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 int __cpu_disable(void)
 {
-       if (smp_ops->cpu_disable)
-               return smp_ops->cpu_disable();
+       struct device_node *l2_cache;
+       int cpu = smp_processor_id();
+       int base, i;
+       int err;
 
-       return -ENOSYS;
+       if (!smp_ops->cpu_disable)
+               return -ENOSYS;
+
+       err = smp_ops->cpu_disable();
+       if (err)
+               return err;
+
+       /* Update sibling maps */
+       base = cpu_first_thread_sibling(cpu);
+       for (i = 0; i < threads_per_core; i++) {
+               cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
+               cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
+               cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
+               cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
+       }
+
+       l2_cache = cpu_to_l2cache(cpu);
+       for_each_present_cpu(i) {
+               struct device_node *np = cpu_to_l2cache(i);
+               if (!np)
+                       continue;
+               if (np == l2_cache) {
+                       cpumask_clear_cpu(cpu, cpu_core_mask(i));
+                       cpumask_clear_cpu(i, cpu_core_mask(cpu));
+               }
+               of_node_put(np);
+       }
+       of_node_put(l2_cache);
+
+
+       return 0;
 }
 
 void __cpu_die(unsigned int cpu)
@@ -579,4 +739,26 @@ void __cpu_die(unsigned int cpu)
        if (smp_ops->cpu_die)
                smp_ops->cpu_die(cpu);
 }
+
+static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+       mutex_lock(&powerpc_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+       mutex_unlock(&powerpc_cpu_hotplug_driver_mutex);
+}
+
+void cpu_die(void)
+{
+       if (ppc_md.cpu_die)
+               ppc_md.cpu_die();
+
+       /* If we return, we re-enter start_secondary */
+       start_secondary_resume();
+}
+
 #endif