Merge branch 'cpuidle-perf-events' into idle-test
Len Brown [Wed, 12 Jan 2011 23:06:19 +0000 (18:06 -0500)]
arch/arm/mach-omap2/cpuidle34xx.c
arch/ia64/include/asm/processor.h
arch/ia64/kernel/process.c
arch/sh/kernel/cpu/shmobile/cpuidle.c
arch/x86/include/asm/processor.h
arch/x86/kernel/process.c
drivers/acpi/processor_core.c
drivers/acpi/processor_idle.c
drivers/cpuidle/cpuidle.c
drivers/idle/intel_idle.c
include/linux/cpuidle.h

index f3e043f..5bdfc44 100644 (file)
@@ -47,6 +47,8 @@
 
 #define OMAP3_STATE_MAX OMAP3_STATE_C7
 
+#define CPUIDLE_FLAG_CHECK_BM  0x10000 /* use omap3_enter_idle_bm() */
+
 struct omap3_processor_cx {
        u8 valid;
        u8 type;
index 348e44d..03afe79 100644 (file)
@@ -717,8 +717,9 @@ prefetchw (const void *x)
 #define spin_lock_prefetch(x)  prefetchw(x)
 
 extern unsigned long boot_option_idle_override;
-extern unsigned long idle_halt;
-extern unsigned long idle_nomwait;
+
+enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
+                        IDLE_NOMWAIT, IDLE_POLL};
 
 #endif /* !__ASSEMBLY__ */
 
index 16f1c7b..6d33c5c 100644 (file)
 
 void (*ia64_mark_idle)(int);
 
-unsigned long boot_option_idle_override = 0;
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
-unsigned long idle_halt;
-EXPORT_SYMBOL(idle_halt);
-unsigned long idle_nomwait;
-EXPORT_SYMBOL(idle_nomwait);
 void (*pm_idle) (void);
 EXPORT_SYMBOL(pm_idle);
 void (*pm_power_off) (void);
index 83972aa..c19e2a9 100644 (file)
@@ -81,7 +81,6 @@ void sh_mobile_setup_cpuidle(void)
        state->target_residency = 1 * 2;
        state->power_usage = 3;
        state->flags = 0;
-       state->flags |= CPUIDLE_FLAG_SHALLOW;
        state->flags |= CPUIDLE_FLAG_TIME_VALID;
        state->enter = cpuidle_sleep_enter;
 
index c6efecf..521acfc 100644 (file)
@@ -761,10 +761,11 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
 extern void init_c1e_mask(void);
 
 extern unsigned long           boot_option_idle_override;
-extern unsigned long           idle_halt;
-extern unsigned long           idle_nomwait;
 extern bool                    c1e_detected;
 
+enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
+                        IDLE_POLL, IDLE_FORCE_MWAIT};
+
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
index 67e96e6..d8286ed 100644 (file)
 #include <asm/i387.h>
 #include <asm/debugreg.h>
 
-unsigned long idle_halt;
-EXPORT_SYMBOL(idle_halt);
-unsigned long idle_nomwait;
-EXPORT_SYMBOL(idle_nomwait);
-
 struct kmem_cache *task_xstate_cachep;
 EXPORT_SYMBOL_GPL(task_xstate_cachep);
 
@@ -327,7 +322,7 @@ long sys_execve(const char __user *name,
 /*
  * Idle related variables and functions
  */
-unsigned long boot_option_idle_override = 0;
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
 /*
@@ -505,7 +500,6 @@ static void poll_idle(void)
  *
  * idle=mwait overrides this decision and forces the usage of mwait.
  */
-static int __cpuinitdata force_mwait;
 
 #define MWAIT_INFO                     0x05
 #define MWAIT_ECX_EXTENDED_INFO                0x01
@@ -515,7 +509,7 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
 {
        u32 eax, ebx, ecx, edx;
 
-       if (force_mwait)
+       if (boot_option_idle_override == IDLE_FORCE_MWAIT)
                return 1;
 
        if (c->cpuid_level < MWAIT_INFO)
@@ -635,9 +629,10 @@ static int __init idle_setup(char *str)
        if (!strcmp(str, "poll")) {
                printk("using polling idle threads.\n");
                pm_idle = poll_idle;
-       } else if (!strcmp(str, "mwait"))
-               force_mwait = 1;
-       else if (!strcmp(str, "halt")) {
+               boot_option_idle_override = IDLE_POLL;
+       } else if (!strcmp(str, "mwait")) {
+               boot_option_idle_override = IDLE_FORCE_MWAIT;
+       } else if (!strcmp(str, "halt")) {
                /*
                 * When the boot option of idle=halt is added, halt is
                 * forced to be used for CPU idle. In such case CPU C2/C3
@@ -646,8 +641,7 @@ static int __init idle_setup(char *str)
                 * the boot_option_idle_override.
                 */
                pm_idle = default_idle;
-               idle_halt = 1;
-               return 0;
+               boot_option_idle_override = IDLE_HALT;
        } else if (!strcmp(str, "nomwait")) {
                /*
                 * If the boot option of "idle=nomwait" is added,
@@ -655,12 +649,10 @@ static int __init idle_setup(char *str)
                 * states. In such case it won't touch the variable
                 * of boot_option_idle_override.
                 */
-               idle_nomwait = 1;
-               return 0;
+               boot_option_idle_override = IDLE_NOMWAIT;
        } else
                return -1;
 
-       boot_option_idle_override = 1;
        return 0;
 }
 early_param("idle", idle_setup);
index bec561c..3c1a2fe 100644 (file)
@@ -23,7 +23,7 @@ static int set_no_mwait(const struct dmi_system_id *id)
 {
        printk(KERN_NOTICE PREFIX "%s detected - "
                "disabling mwait for CPU C-states\n", id->ident);
-       idle_nomwait = 1;
+       boot_option_idle_override = IDLE_NOMWAIT;
        return 0;
 }
 
@@ -283,7 +283,7 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 {
        acpi_status status = AE_OK;
 
-       if (idle_nomwait) {
+       if (boot_option_idle_override == IDLE_NOMWAIT) {
                /*
                 * If mwait is disabled for CPU C-states, the C2C3_FFH access
                 * mode will be disabled in the parameter of _PDC object.
index a765b82..d615b7d 100644 (file)
@@ -79,6 +79,13 @@ module_param(bm_check_disable, uint, 0000);
 static unsigned int latency_factor __read_mostly = 2;
 module_param(latency_factor, uint, 0644);
 
+static int disabled_by_idle_boot_param(void)
+{
+       return boot_option_idle_override == IDLE_POLL ||
+               boot_option_idle_override == IDLE_FORCE_MWAIT ||
+               boot_option_idle_override == IDLE_HALT;
+}
+
 /*
  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
  * For now disable this. Probably a bug somewhere else.
@@ -455,7 +462,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
                                continue;
                        }
                        if (cx.type == ACPI_STATE_C1 &&
-                                       (idle_halt || idle_nomwait)) {
+                           (boot_option_idle_override == IDLE_NOMWAIT)) {
                                /*
                                 * In most cases the C1 space_id obtained from
                                 * _CST object is FIXED_HARDWARE access mode.
@@ -1016,7 +1023,6 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
                state->flags = 0;
                switch (cx->type) {
                        case ACPI_STATE_C1:
-                       state->flags |= CPUIDLE_FLAG_SHALLOW;
                        if (cx->entry_method == ACPI_CSTATE_FFH)
                                state->flags |= CPUIDLE_FLAG_TIME_VALID;
 
@@ -1025,16 +1031,13 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
                        break;
 
                        case ACPI_STATE_C2:
-                       state->flags |= CPUIDLE_FLAG_BALANCED;
                        state->flags |= CPUIDLE_FLAG_TIME_VALID;
                        state->enter = acpi_idle_enter_simple;
                        dev->safe_state = state;
                        break;
 
                        case ACPI_STATE_C3:
-                       state->flags |= CPUIDLE_FLAG_DEEP;
                        state->flags |= CPUIDLE_FLAG_TIME_VALID;
-                       state->flags |= CPUIDLE_FLAG_CHECK_BM;
                        state->enter = pr->flags.bm_check ?
                                        acpi_idle_enter_bm :
                                        acpi_idle_enter_simple;
@@ -1058,7 +1061,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
 {
        int ret = 0;
 
-       if (boot_option_idle_override)
+       if (disabled_by_idle_boot_param())
                return 0;
 
        if (!pr)
@@ -1089,19 +1092,10 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
        acpi_status status = 0;
        static int first_run;
 
-       if (boot_option_idle_override)
+       if (disabled_by_idle_boot_param())
                return 0;
 
        if (!first_run) {
-               if (idle_halt) {
-                       /*
-                        * When the boot option of "idle=halt" is added, halt
-                        * is used for CPU IDLE.
-                        * In such case C2/C3 is meaningless. So the max_cstate
-                        * is set to one.
-                        */
-                       max_cstate = 1;
-               }
                dmi_check_system(processor_power_dmi_table);
                max_cstate = acpi_processor_cstate_check(max_cstate);
                if (max_cstate < ACPI_C_STATES_MAX)
@@ -1142,7 +1136,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 int acpi_processor_power_exit(struct acpi_processor *pr,
                              struct acpi_device *device)
 {
-       if (boot_option_idle_override)
+       if (disabled_by_idle_boot_param())
                return 0;
 
        cpuidle_unregister_device(&pr->power.dev);
index e4855c3..bf50924 100644 (file)
@@ -161,6 +161,45 @@ void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
+#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+       ktime_t t1, t2;
+       s64 diff;
+       int ret;
+
+       t1 = ktime_get();
+       local_irq_enable();
+       while (!need_resched())
+               cpu_relax();
+
+       t2 = ktime_get();
+       diff = ktime_to_us(ktime_sub(t2, t1));
+       if (diff > INT_MAX)
+               diff = INT_MAX;
+
+       ret = (int) diff;
+       return ret;
+}
+
+static void poll_idle_init(struct cpuidle_device *dev)
+{
+       struct cpuidle_state *state = &dev->states[0];
+
+       cpuidle_set_statedata(state, NULL);
+
+       snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
+       snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
+       state->exit_latency = 0;
+       state->target_residency = 0;
+       state->power_usage = -1;
+       state->flags = 0;
+       state->enter = poll_idle;
+}
+#else
+static void poll_idle_init(struct cpuidle_device *dev) {}
+#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
+
 /**
  * cpuidle_enable_device - enables idle PM for a CPU
  * @dev: the CPU
@@ -185,6 +224,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
                        return ret;
        }
 
+       poll_idle_init(dev);
+
        if ((ret = cpuidle_add_state_sysfs(dev)))
                return ret;
 
@@ -239,45 +280,6 @@ void cpuidle_disable_device(struct cpuidle_device *dev)
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st)
-{
-       ktime_t t1, t2;
-       s64 diff;
-       int ret;
-
-       t1 = ktime_get();
-       local_irq_enable();
-       while (!need_resched())
-               cpu_relax();
-
-       t2 = ktime_get();
-       diff = ktime_to_us(ktime_sub(t2, t1));
-       if (diff > INT_MAX)
-               diff = INT_MAX;
-
-       ret = (int) diff;
-       return ret;
-}
-
-static void poll_idle_init(struct cpuidle_device *dev)
-{
-       struct cpuidle_state *state = &dev->states[0];
-
-       cpuidle_set_statedata(state, NULL);
-
-       snprintf(state->name, CPUIDLE_NAME_LEN, "C0");
-       snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
-       state->exit_latency = 0;
-       state->target_residency = 0;
-       state->power_usage = -1;
-       state->flags = CPUIDLE_FLAG_POLL;
-       state->enter = poll_idle;
-}
-#else
-static void poll_idle_init(struct cpuidle_device *dev) {}
-#endif /* CONFIG_ARCH_HAS_CPU_RELAX */
-
 /**
  * __cpuidle_register_device - internal register function called before register
  * and enable routines
@@ -298,8 +300,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
 
        init_completion(&dev->kobj_unregister);
 
-       poll_idle_init(dev);
-
        /*
         * cpuidle driver should set the dev->power_specified bit
         * before registering the device if the driver provides
index 60fa6ec..7acb32e 100644 (file)
@@ -59,6 +59,8 @@
 #include <linux/hrtimer.h>     /* ktime_get_real() */
 #include <trace/events/power.h>
 #include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
 #include <asm/mwait.h>
 
 #define INTEL_IDLE_VERSION "0.4"
@@ -73,6 +75,7 @@ static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
 
 static unsigned int mwait_substates;
 
+#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
 /* Reliable LAPIC Timer States, bit 1 for C1 etc.  */
 static unsigned int lapic_timer_reliable_states = (1 << 1);     /* Default to only C1 */
 
@@ -82,6 +85,14 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
 static struct cpuidle_state *cpuidle_state_table;
 
 /*
+ * Set this flag for states where the HW flushes the TLB for us
+ * and so we don't need cross-calls to keep it consistent.
+ * If this flag is set, SW flushes the TLB, so even if the
+ * HW doesn't do the flushing, this flag is safe to use.
+ */
+#define CPUIDLE_FLAG_TLB_FLUSHED       0x10000
+
+/*
  * States are indexed by the cstate number,
  * which is also the index into the MWAIT hint array.
  * Thus C0 is a dummy.
@@ -122,7 +133,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .driver_data = (void *) 0x00,
                .flags = CPUIDLE_FLAG_TIME_VALID,
                .exit_latency = 1,
-               .target_residency = 4,
+               .target_residency = 1,
                .enter = &intel_idle },
        { /* MWAIT C2 */
                .name = "SNB-C3",
@@ -130,7 +141,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .driver_data = (void *) 0x10,
                .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 80,
-               .target_residency = 160,
+               .target_residency = 211,
                .enter = &intel_idle },
        { /* MWAIT C3 */
                .name = "SNB-C6",
@@ -138,7 +149,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .driver_data = (void *) 0x20,
                .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 104,
-               .target_residency = 208,
+               .target_residency = 345,
                .enter = &intel_idle },
        { /* MWAIT C4 */
                .name = "SNB-C7",
@@ -146,7 +157,7 @@ static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .driver_data = (void *) 0x30,
                .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 109,
-               .target_residency = 300,
+               .target_residency = 345,
                .enter = &intel_idle },
 };
 
@@ -241,6 +252,39 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        return usec_delta;
 }
 
+static void __setup_broadcast_timer(void *arg)
+{
+       unsigned long reason = (unsigned long)arg;
+       int cpu = smp_processor_id();
+
+       reason = reason ?
+               CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
+
+       clockevents_notify(reason, &cpu);
+}
+
+static int __cpuinit setup_broadcast_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       int hotcpu = (unsigned long)hcpu;
+
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+               smp_call_function_single(hotcpu, __setup_broadcast_timer,
+                       (void *)true, 1);
+               break;
+       case CPU_DOWN_PREPARE:
+               smp_call_function_single(hotcpu, __setup_broadcast_timer,
+                       (void *)false, 1);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata setup_broadcast_notifier = {
+       .notifier_call = setup_broadcast_cpuhp_notify,
+};
+
 /*
  * intel_idle_probe()
  */
@@ -303,7 +347,11 @@ static int intel_idle_probe(void)
        }
 
        if (boot_cpu_has(X86_FEATURE_ARAT))     /* Always Reliable APIC Timer */
-               lapic_timer_reliable_states = 0xFFFFFFFF;
+               lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE;
+       else {
+               smp_call_function(__setup_broadcast_timer, (void *)true, 1);
+               register_cpu_notifier(&setup_broadcast_notifier);
+       }
 
        pr_debug(PREFIX "v" INTEL_IDLE_VERSION
                " model 0x%X\n", boot_cpu_data.x86_model);
@@ -401,6 +449,10 @@ static int __init intel_idle_init(void)
 {
        int retval;
 
+       /* Do not load intel_idle at all for now if idle= is passed */
+       if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+               return -ENODEV;
+
        retval = intel_idle_probe();
        if (retval)
                return retval;
@@ -426,6 +478,11 @@ static void __exit intel_idle_exit(void)
        intel_idle_cpuidle_devices_uninit();
        cpuidle_unregister_driver(&intel_idle_driver);
 
+       if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) {
+               smp_call_function(__setup_broadcast_timer, (void *)false, 1);
+               unregister_cpu_notifier(&setup_broadcast_notifier);
+       }
+
        return;
 }
 
index 1be416b..36719ea 100644 (file)
@@ -47,13 +47,7 @@ struct cpuidle_state {
 
 /* Idle State Flags */
 #define CPUIDLE_FLAG_TIME_VALID        (0x01) /* is residency time measurable? */
-#define CPUIDLE_FLAG_CHECK_BM  (0x02) /* BM activity will exit state */
-#define CPUIDLE_FLAG_POLL      (0x10) /* no latency, no savings */
-#define CPUIDLE_FLAG_SHALLOW   (0x20) /* low latency, minimal savings */
-#define CPUIDLE_FLAG_BALANCED  (0x40) /* medium latency, moderate savings */
-#define CPUIDLE_FLAG_DEEP      (0x80) /* high latency, large savings */
 #define CPUIDLE_FLAG_IGNORE    (0x100) /* ignore during this idle period */
-#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)