]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/x86/kernel/apic_64.c
x86, clockevents: add C1E aware idle function
[linux-2.6.git] / arch / x86 / kernel / apic_64.c
index 915808bd8a2ad8b76bd8546add7fd5b16aec3962..a5cc8447cf4dde3cde1ea58bbb3620e828833e26 100644 (file)
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
-#include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
+#include <linux/module.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
+#include <asm/hpet.h>
 #include <asm/pgalloc.h>
-#include <asm/mach_apic.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
 #include <asm/timex.h>
-#include <asm/hpet.h>
 #include <asm/apic.h>
 
-int apic_verbosity;
-int disable_apic_timer __cpuinitdata;
+#include <mach_ipi.h>
+#include <mach_apic.h>
+
+static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
 
-/* Local APIC timer works in C2? */
+/* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
+/*
+ * Debug level, exported for io_apic.c
+ */
+int apic_verbosity;
+
 static struct resource lapic_resource = {
        .name = "Local APIC",
        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -77,6 +83,14 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
+static unsigned long apic_phys;
+
+unsigned long mp_lapic_addr;
+
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
 /*
  * Get the LAPIC version
  */
@@ -130,7 +144,7 @@ u32 safe_apic_wait_icr_idle(void)
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
-void enable_NMI_through_LVT0(void *dummy)
+void __cpuinit enable_NMI_through_LVT0(void)
 {
        unsigned int v;
 
@@ -187,17 +201,35 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 }
 
 /*
- * Setup extended LVT (K8 specific)
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
  */
-void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
-                            unsigned char msg_type, unsigned char mask)
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
 {
-       unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
        unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
        apic_write(reg, v);
 }
 
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+       return APIC_EILVT_LVTOFF_IBS;
+}
+
 /*
  * Program the next event, relative to now
  */
@@ -328,7 +360,8 @@ static void __init calibrate_APIC_clock(void)
                result / 1000 / 1000, result / 1000 % 1000);
 
        /* Calculate the scaled math multiplication factor */
-       lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+       lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+                                      lapic_clockevent.shift);
        lapic_clockevent.max_delta_ns =
                clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
        lapic_clockevent.min_delta_ns =
@@ -337,6 +370,11 @@ static void __init calibrate_APIC_clock(void)
        calibration_result = result / HZ;
 }
 
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
 void __init setup_boot_APIC_clock(void)
 {
        /*
@@ -348,14 +386,28 @@ void __init setup_boot_APIC_clock(void)
        if (disable_apic_timer) {
                printk(KERN_INFO "Disabling APIC timer\n");
                /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1)
+               if (num_possible_cpus() > 1) {
+                       lapic_clockevent.mult = 1;
                        setup_APIC_timer();
+               }
                return;
        }
 
        printk(KERN_INFO "Using local APIC timer interrupts.\n");
        calibrate_APIC_clock();
 
+       /*
+        * Do a sanity check on the APIC calibration result
+        */
+       if (calibration_result < (1000000 / HZ)) {
+               printk(KERN_WARNING
+                      "APIC frequency too slow, disabling apic timer\n");
+               /* No broadcast on UP ! */
+               if (num_possible_cpus() > 1)
+                       setup_APIC_timer();
+               return;
+       }
+
        /*
         * If nmi_watchdog is set to IO_APIC, we need the
         * PIT/HPET going.  Otherwise register lapic as a dummy
@@ -370,31 +422,8 @@ void __init setup_boot_APIC_clock(void)
        setup_APIC_timer();
 }
 
-/*
- * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
- * C1E flag only in the secondary CPU, so when we detect the wreckage
- * we already have enabled the boot CPU local apic timer. Check, if
- * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
- * set the DUMMY flag again and force the broadcast mode in the
- * clockevents layer.
- */
-void __cpuinit check_boot_apic_timer_broadcast(void)
-{
-       if (!disable_apic_timer ||
-           (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
-               return;
-
-       printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
-       lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
-
-       local_irq_enable();
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
-       local_irq_disable();
-}
-
 void __cpuinit setup_secondary_APIC_clock(void)
 {
-       check_boot_apic_timer_broadcast();
        setup_APIC_timer();
 }
 
@@ -481,9 +510,14 @@ int setup_profiling_timer(unsigned int multiplier)
  */
 void clear_local_APIC(void)
 {
-       int maxlvt = lapic_get_maxlvt();
+       int maxlvt;
        u32 v;
 
+       /* APIC hasn't been mapped yet */
+       if (!apic_phys)
+               return;
+
+       maxlvt = lapic_get_maxlvt();
        /*
         * Masking an LVT entry can trigger a local APIC error
         * if the vector is zero. Mask LVTERR first to prevent this.
@@ -592,10 +626,10 @@ int __init verify_local_APIC(void)
        /*
         * The ID register is read/write in a real APIC.
         */
-       reg0 = apic_read(APIC_ID);
+       reg0 = read_apic_id();
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
        apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-       reg1 = apic_read(APIC_ID);
+       reg1 = read_apic_id();
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
        apic_write(APIC_ID, reg0);
        if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -677,9 +711,10 @@ void __init init_bsp_APIC(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-       unsigned int value, maxlvt;
+       unsigned int value;
        int i, j;
 
+       preempt_disable();
        value = apic_read(APIC_LVR);
 
        BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
@@ -773,25 +808,24 @@ void __cpuinit setup_local_APIC(void)
        else
                value = APIC_DM_NMI | APIC_LVT_MASKED;
        apic_write(APIC_LVT1, value);
+       preempt_enable();
+}
 
-       {
-               unsigned oldvalue;
-               maxlvt = lapic_get_maxlvt();
-               oldvalue = apic_read(APIC_ESR);
-               value = ERROR_APIC_VECTOR;      // enables sending errors
-               apic_write(APIC_LVTERR, value);
-               /*
-                * spec says clear errors after enabling vector.
-                */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE,
-                       "ESR value after enabling vector: %08x, after %08x\n",
-                       oldvalue, value);
-       }
+static void __cpuinit lapic_setup_esr(void)
+{
+       unsigned maxlvt = lapic_get_maxlvt();
+
+       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
+       /*
+        * spec says clear errors after enabling vector.
+        */
+       if (maxlvt > 3)
+               apic_write(APIC_ESR, 0);
+}
 
+void __cpuinit end_local_APIC_setup(void)
+{
+       lapic_setup_esr();
        nmi_watchdog_default();
        setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
@@ -811,17 +845,39 @@ static int __init detect_init_APIC(void)
        }
 
        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-       boot_cpu_id = 0;
+       boot_cpu_physical_apicid = 0;
        return 0;
 }
 
+void __init early_init_lapic_mapping(void)
+{
+       unsigned long apic_phys;
+
+       /*
+        * If no local APIC can be found then go out
+        * : it means there is no mpatable and MADT
+        */
+       if (!smp_found_config)
+               return;
+
+       apic_phys = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+                                APIC_BASE, apic_phys);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+}
+
 /**
  * init_apic_mappings - initialize APIC mappings
  */
 void __init init_apic_mappings(void)
 {
-       unsigned long apic_phys;
-
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
@@ -837,16 +893,11 @@ void __init init_apic_mappings(void)
        apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
                                APIC_BASE, apic_phys);
 
-       /* Put local APIC into the resource map. */
-       lapic_resource.start = apic_phys;
-       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-       insert_resource(&iomem_resource, &lapic_resource);
-
        /*
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+       boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 }
 
 /*
@@ -867,11 +918,20 @@ int __init APIC_init_uniprocessor(void)
 
        verify_local_APIC();
 
-       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
 
        setup_local_APIC();
 
+       /*
+        * Now enable IO-APICs, actually call clear_IO_APIC
+        * We need clear_IO_APIC before enabling vector on BP
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               enable_IO_APIC();
+
+       end_local_APIC_setup();
+
        if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
                setup_IO_APIC();
        else
@@ -976,6 +1036,52 @@ void disconnect_bsp_APIC(int virt_wire_setup)
        apic_write(APIC_LVT1, value);
 }
 
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+       int cpu;
+       cpumask_t tmp_map;
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                      " Processor ignored.\n", NR_CPUS);
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                      " Processor ignored.\n", maxcpus);
+               return;
+       }
+
+       num_processors++;
+       cpus_complement(tmp_map, cpu_present_map);
+       cpu = first_cpu(tmp_map);
+
+       physid_set(apicid, phys_cpu_present_map);
+       if (apicid == boot_cpu_physical_apicid) {
+               /*
+                * x86_bios_cpu_apicid is required to have processors listed
+                * in same order as logical cpu numbers. Hence the first
+                * entry is BSP, and so on.
+                */
+               cpu = 0;
+       }
+       /* are we being called early in kernel startup? */
+       if (x86_cpu_to_apicid_early_ptr) {
+               u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+               u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+               cpu_to_apicid[cpu] = apicid;
+               bios_cpu_apicid[cpu] = apicid;
+       } else {
+               per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+               per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+       }
+
+       cpu_set(cpu, cpu_possible_map);
+       cpu_set(cpu, cpu_present_map);
+}
+
 /*
  * Power management
  */
@@ -1012,7 +1118,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
        maxlvt = lapic_get_maxlvt();
 
-       apic_pm_state.apic_id = apic_read(APIC_ID);
+       apic_pm_state.apic_id = read_apic_id();
        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1084,8 +1190,8 @@ static struct sysdev_class lapic_sysclass = {
 };
 
 static struct sys_device device_lapic = {
-       .id             = 0,
-       .cls            = &lapic_sysclass,
+       .id     = 0,
+       .cls    = &lapic_sysclass,
 };
 
 static void __cpuinit apic_pm_activate(void)
@@ -1096,9 +1202,11 @@ static void __cpuinit apic_pm_activate(void)
 static int __init init_lapic_sysfs(void)
 {
        int error;
+
        if (!cpu_has_apic)
                return 0;
        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+
        error = sysdev_class_register(&lapic_sysclass);
        if (!error)
                error = sysdev_register(&device_lapic);
@@ -1125,21 +1233,44 @@ __cpuinit int apic_is_clustered_box(void)
 {
        int i, clusters, zeros;
        unsigned id;
+       u16 *bios_cpu_apicid;
        DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
+       /*
+        * there is not this kind of box with AMD CPU yet.
+        * Some AMD box with quadcore cpu and 8 sockets apicid
+        * will be [4, 0x23] or [8, 0x27] could be thought to
+        * vsmp box still need checking...
+        */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+               return 0;
+
+       bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
        bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
        for (i = 0; i < NR_CPUS; i++) {
-               id = bios_cpu_apicid[i];
+               /* are we being called early in kernel startup? */
+               if (bios_cpu_apicid) {
+                       id = bios_cpu_apicid[i];
+               }
+               else if (i < nr_cpu_ids) {
+                       if (cpu_present(i))
+                               id = per_cpu(x86_bios_cpu_apicid, i);
+                       else
+                               continue;
+               }
+               else
+                       break;
+
                if (id != BAD_APICID)
                        __set_bit(APIC_CLUSTERID(id), clustermap);
        }
 
        /* Problem:  Partially populated chassis may not have CPUs in some of
         * the APIC clusters they have been allocated.  Only present CPUs have
-        * bios_cpu_apicid entries, thus causing zeroes in the bitmap.  Since
-        * clusters are allocated sequentially, count zeros only if they are
-        * bounded by ones.
+        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+        * Since clusters are allocated sequentially, count zeros only if
+        * they are bounded by ones.
         */
        clusters = 0;
        zeros = 0;
@@ -1151,6 +1282,12 @@ __cpuinit int apic_is_clustered_box(void)
                        ++zeros;
        }
 
+       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+        * not guaranteed to be synced between boards
+        */
+       if (is_vsmp_box() && clusters > 1)
+               return 1;
+
        /*
         * If clusters > 2, then should be multi-chassis.
         * May have to revisit this when multi-core + hyperthreaded CPUs come
@@ -1186,7 +1323,7 @@ early_param("apic", apic_set_verbosity);
 static __init int setup_disableapic(char *str)
 {
        disable_apic = 1;
-       clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
        return 0;
 }
 early_param("disableapic", setup_disableapic);
@@ -1222,3 +1359,21 @@ static __init int setup_apicpmtimer(char *s)
 }
 __setup("apicpmtimer", setup_apicpmtimer);
 
+static int __init lapic_insert_resource(void)
+{
+       if (!apic_phys)
+               return -1;
+
+       /* Put local APIC into the resource map. */
+       lapic_resource.start = apic_phys;
+       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+       insert_resource(&iomem_resource, &lapic_resource);
+
+       return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);