#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
-#include <asm/vmi.h>
+#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
-static int low_mappings;
#endif
/* State of each CPU */
static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
+
+/*
+ * We need this for trampoline_base protection from concurrent accesses when
+ * off- and onlining cores wildly.
+ */
+static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+}
+
+ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
+ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
*/
smp_store_cpu_info(cpuid);
+ /*
+ * This must be done before setting cpu_online_mask
+ * or calling notify_cpu_starting.
+ */
+ set_cpu_sibling_map(raw_smp_processor_id());
+ wmb();
+
notify_cpu_starting(cpuid);
/*
* fragile that we want to limit the things done here to the
* most necessary things.
*/
- vmi_bringup();
cpu_init();
preempt_disable();
smp_callin();
+#ifdef CONFIG_X86_32
+ /* switch away from the initial page table */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
+
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
*/
check_tsc_sync_target();
- if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->chip->mask(0);
- enable_NMI_through_LVT0();
- legacy_pic->chip->unmask(0);
- }
-
-#ifdef CONFIG_X86_32
- while (low_mappings)
- cpu_relax();
- __flush_tlb_all();
-#endif
-
- /* This must be done before setting cpu_online_mask */
- set_cpu_sibling_map(raw_smp_processor_id());
- wmb();
-
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
identify_secondary_cpu(c);
}
+static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
+{
+ struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
+ struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
+
+ cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
+ cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
+ cpumask_set_cpu(cpu1, c2->llc_shared_map);
+ cpumask_set_cpu(cpu2, c1->llc_shared_map);
+}
+
void __cpuinit set_cpu_sibling_map(int cpu)
{
for_each_cpu(i, cpu_sibling_setup_mask) {
struct cpuinfo_x86 *o = &cpu_data(i);
- if (c->phys_proc_id == o->phys_proc_id &&
- c->cpu_core_id == o->cpu_core_id) {
- cpumask_set_cpu(i, cpu_sibling_mask(cpu));
- cpumask_set_cpu(cpu, cpu_sibling_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, o->llc_shared_map);
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ if (c->phys_proc_id == o->phys_proc_id &&
+ c->compute_unit_id == o->compute_unit_id)
+ link_thread_siblings(cpu, i);
+ } else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_core_id == o->cpu_core_id) {
+ link_thread_siblings(cpu, i);
}
}
} else {
cpumask_set_cpu(cpu, c->llc_shared_map);
- if (current_cpu_data.x86_max_cores == 1) {
+ if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
c->booted_cores = 1;
return;
static void __cpuinit announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
- int node = cpu_to_node(cpu);
+ int node = early_cpu_to_node(cpu);
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
goto do_rest;
}
- if (!keventd_up() || current_is_keventd())
- c_idle.work.func(&c_idle.work);
- else {
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
- }
+ schedule_work(&c_idle.work);
+ wait_for_completion(&c_idle.done);
if (IS_ERR(c_idle.idle)) {
printk("failed fork for CPU %d\n", cpu);
if (cpumask_test_cpu(cpu, cpu_callin_mask))
break; /* It has booted */
udelay(100);
+ /*
+ * Allow other tasks to run while we wait for the
+ * AP to come online. This also gives a chance
+ * for the MTRR work(triggered by the AP coming online)
+ * to be completed in the stop machine context.
+ */
+ schedule();
}
if (cpumask_test_cpu(cpu, cpu_callin_mask))
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-#ifdef CONFIG_X86_32
- /* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- flush_tlb_all();
- low_mappings = 1;
-
- err = do_boot_cpu(apicid, cpu);
-
- zap_low_mappings(false);
- low_mappings = 0;
-#else
err = do_boot_cpu(apicid, cpu);
-#endif
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
printk(KERN_INFO "SMP mode deactivated.\n");
smpboot_clear_io_apic();
- localise_nmi_watchdog();
-
connect_bsp_APIC();
setup_local_APIC();
end_local_APIC_setup();
preempt_disable();
smp_cpu_index_default();
- current_cpu_data = boot_cpu_data;
+ memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
/*
}
set_cpu_sibling_map(0);
- enable_IR_x2apic();
- default_setup_apic_routing();
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
goto out;
}
+ default_setup_apic_routing();
+
preempt_disable();
if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
preempt_enable();
}
+void arch_disable_nonboot_cpus_begin(void)
+{
+ /*
+ * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+ * In the suspend path, we will be back in the SMP mode shortly anyways.
+ */
+ skip_smp_alternatives = true;
+}
+
+void arch_disable_nonboot_cpus_end(void)
+{
+ skip_smp_alternatives = false;
+}
+
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
- check_nmi_watchdog();
mtrr_aps_init();
}
if (cpu == 0)
return -EBUSY;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
cpu_disable_common();
{
idle_task_exit();
reset_lazy_tlbstate();
- irq_ctx_exit(raw_smp_processor_id());
c1e_remove_cpu(raw_smp_processor_id());
mb();
/* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
+ __this_cpu_write(cpu_state, CPU_DEAD);
/*
* With physical CPU hotplug, we should halt the cpu
local_irq_disable();
}
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int highest_cstate = 0;
+ unsigned int highest_subcstate = 0;
+ int i;
+ void *mwait_ptr;
+
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
+ return;
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+ return;
+ if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
+ return;
+
+ eax = CPUID_MWAIT_LEAF;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ /*
+ * eax will be 0 if EDX enumeration is not valid.
+ * Initialized below to cstate, sub_cstate value when EDX is valid.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+ eax = 0;
+ } else {
+ edx >>= MWAIT_SUBSTATE_SIZE;
+ for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+ if (edx & MWAIT_SUBSTATE_MASK) {
+ highest_cstate = i;
+ highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+ }
+ }
+ eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+ (highest_subcstate - 1);
+ }
+
+ /*
+ * This should be a memory location in a cache line which is
+ * unlikely to be touched by other processors. The actual
+ * content is immaterial as it is not actually modified in any way.
+ */
+ mwait_ptr = ¤t_thread_info()->flags;
+
+ wbinvd();
+
+ while (1) {
+ /*
+ * The CLFLUSH is a workaround for erratum AAI65 for
+ * the Xeon 7400 series. It's not clear it is actually
+ * needed, but it should be harmless in either case.
+ * The WBINVD is insufficient due to the spurious-wakeup
+ * case where we return around the loop.
+ */
+ clflush(mwait_ptr);
+ __monitor(mwait_ptr, 0, 0);
+ mb();
+ __mwait(eax, 0);
+ }
+}
+
+static inline void hlt_play_dead(void)
+{
+ if (__this_cpu_read(cpu_info.x86) >= 4)
+ wbinvd();
+
+ while (1) {
+ native_halt();
+ }
+}
+
void native_play_dead(void)
{
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- wbinvd_halt();
+
+ mwait_play_dead(); /* Only returns on failure */
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */