ARM: tegra: power: Power off multiple CPUs on-line
Alex Frid [Fri, 3 Feb 2012 00:21:02 +0000 (16:21 -0800)]
Currently on Tegra3 cpu complex is powered off in idle (enters CPU0
LP2 state) only if all secondary CPUs are off-line. This commit adds
an option for CPU0 to enter LP2 while secondary CPUs are still on-line
but have been power gated and entered LP2 state by themselves.

The critical race: secondary CPU is waking up from LP2, while CPU0 is
turning common CPU rail off, is addressed as follows.

1. When entering LP2 state on CPU0:
a) disable GIC distributor
b) check that CPU1-3 are all power-gated (i.e., either off-lined or
have entered LP2)
c) if (b) passes - set all interrupts affinity to CPU0, then
re-enable distributor and continue with CPU complex powering off
d) if (b) fails - re-enable distributor and enter clock-gated (LP3)
state on CPU0
This procedure prevents waking secondary CPUs by GIC SPIs.

2. We still need to make sure that no CPU1-3 PPIs from legacy IRQ/FIQ
or private timers would happen. This is achieved by disabling timers
and legacy interrupts if CPU1-3 enters LP2 state with external timers
selected as wake sources. Respectively, establish dependency between
turning rail off and LP2 wake timers configuration options.

3. Finally, no IPIs is sent by CPU0 entering LP2.

There are no special changes in wake up procedures - whenever CPU0
is awaken by external interrupt or wake timer, cpu complex is powered
on by h/w, and secondary CPUs that were in LP2 state are ungated by
the same interrupt (off-line CPUs are kept power gated). Hence, there
is no need for CPU1-3 external wake timers to run while the rail is
off, and these timers are stopped. To make sure that none of secondary
CPUs over-sleeps its LP2 time, CPU0 wake timer is set to minimum sleep
interval of all CPUs.

By default configuration option for powering off multiple on-line CPUs
is disabled on Tegra3.

Change-Id: I4920d0df375536b2b8ebd9e6738c5fe4f92b92a0
Signed-off-by: Alex Frid <afrid@nvidia.com>
Reviewed-on: http://git-master/r/83547
Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>

Rebase-Id: Rc518e98ddce9152f0eeb086a49c31e1c252fe9eb

arch/arm/mach-tegra/Kconfig
arch/arm/mach-tegra/cpuidle-t3.c
arch/arm/mach-tegra/pm.h
arch/arm/mach-tegra/timer-t3.c

index 0d752e2..5feeef4 100644 (file)
@@ -50,7 +50,7 @@ config ARCH_TEGRA_3x_SOC
        select PINCTRL_TEGRA30
        select PL310_ERRATA_769419 if CACHE_L2X0
        select REPORT_PRESENT_CPUS if TEGRA_AUTO_HOTPLUG
-       select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD
+       select TEGRA_LP2_ARM_TWD if HAVE_ARM_TWD && !TEGRA_RAIL_OFF_MULTIPLE_CPUS
        select USB_ARCH_HAS_EHCI if USB_SUPPORT
        select USB_ULPI if USB_PHY
        select USB_ULPI_VIEWPORT if USB_PHY
@@ -249,6 +249,9 @@ config TEGRA_WDT_RECOVERY
 
 config TEGRA_LP2_ARM_TWD
        bool
+
+config TEGRA_RAIL_OFF_MULTIPLE_CPUS
+       bool
 endif
 
 config TEGRA_SLOW_CSITE
index 665e102..fbd39da 100644 (file)
@@ -63,6 +63,8 @@
 
 #define CLK_RST_CONTROLLER_CPU_CMPLX_STATUS \
        (IO_ADDRESS(TEGRA_CLK_RESET_BASE) + 0x470)
+#define PMC_POWERGATE_STATUS \
+       (IO_ADDRESS(TEGRA_PMC_BASE) + 0x038)
 
 #ifdef CONFIG_SMP
 static s64 tegra_cpu_wake_by_time[4] = {
@@ -119,6 +121,22 @@ void tegra3_cpu_idle_stats_lp2_time(unsigned int cpu, s64 us)
        idle_stats.cpu_wants_lp2_time[cpu_number(cpu)] += us;
 }
 
+/* Allow rail off only if all secondary CPUs are power gated, and no
+   rail update is in progress */
+static bool tegra3_rail_off_is_allowed(void)
+{
+       u32 rst = readl(CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
+       u32 pg = readl(PMC_POWERGATE_STATUS) >> 8;
+
+       if (((rst & 0xE) != 0xE) || ((pg & 0xE) != 0))
+               return false;
+
+       if (tegra_dvfs_rail_updating(cpu_clk_for_dvfs))
+               return false;
+
+       return true;
+}
+
 bool tegra3_lp2_is_allowed(struct cpuidle_device *dev,
        struct cpuidle_state *state)
 {
@@ -137,20 +155,15 @@ bool tegra3_lp2_is_allowed(struct cpuidle_device *dev,
                num_online_cpus() > 1)
                return false;
 
+#ifndef CONFIG_TEGRA_RAIL_OFF_MULTIPLE_CPUS
        /* FIXME: All CPU's entering LP2 is not working.
         * Don't let CPU0 enter LP2 when any secondary CPU is online.
         */
        if ((dev->cpu == 0) && (num_online_cpus() > 1))
                return false;
-
-       if (dev->cpu == 0) {
-               u32 reg = readl(CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
-               if ((reg & 0xE) != 0xE)
-                       return false;
-
-               if (tegra_dvfs_rail_updating(cpu_clk_for_dvfs))
-                       return false;
-       }
+#endif
+       if ((dev->cpu == 0)  && (!tegra3_rail_off_is_allowed()))
+               return false;
 
        request = ktime_to_us(tick_nohz_get_sleep_length());
        if (state->exit_latency != lp2_exit_latencies[cpu_number(dev->cpu)]) {
@@ -173,13 +186,29 @@ static inline void tegra3_lp3_fall_back(struct cpuidle_device *dev)
        dev->last_state = &dev->states[0];
 }
 
+static inline void tegra3_lp2_restore_affinity(void)
+{
+#ifdef CONFIG_SMP
+       /* Disable the distributor. */
+       tegra_gic_dist_disable();
+
+       /* Restore the other CPU's interrupt affinity. */
+       tegra_gic_restore_affinity();
+
+       /* Re-enable the distributor. */
+       tegra_gic_dist_enable();
+#endif
+}
+
 static void tegra3_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
                           struct cpuidle_state *state, s64 request)
 {
        ktime_t entry_time;
        ktime_t exit_time;
        bool sleep_completed = false;
+       bool multi_cpu_entry = false;
        int bin;
+       s64 sleep_time;
 
        /* LP2 entry time */
        entry_time = ktime_get();
@@ -191,7 +220,8 @@ static void tegra3_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
        }
 
 #ifdef CONFIG_SMP
-       if (!is_lp_cluster() && (num_online_cpus() > 1)) {
+       multi_cpu_entry = !is_lp_cluster() && (num_online_cpus() > 1);
+       if (multi_cpu_entry) {
                s64 wake_time;
                unsigned int i;
 
@@ -203,20 +233,13 @@ static void tegra3_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
 
                /* Did an interrupt come in for another CPU before we
                   could disable the distributor? */
-               if (!tegra3_lp2_is_allowed(dev, state)) {
+               if (!tegra3_rail_off_is_allowed()) {
                        /* Yes, re-enable the distributor and LP3. */
                        tegra_gic_dist_enable();
                        tegra3_lp3_fall_back(dev);
                        return;
                }
 
-               /* Save and disable the affinity setting for the other
-                  CPUs and route all interrupts to CPU0. */
-               tegra_gic_disable_affinity();
-
-               /* Re-enable the distributor. */
-               tegra_gic_dist_enable();
-
                /* LP2 initial targeted wake time */
                wake_time = ktime_to_us(entry_time) + request;
 
@@ -229,54 +252,56 @@ static void tegra3_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
                /* LP2 actual targeted wake time */
                request = wake_time - ktime_to_us(entry_time);
                BUG_ON(wake_time < 0LL);
-       }
-#endif
 
-       if (request > state->target_residency) {
-               s64 sleep_time = request -
-                       lp2_exit_latencies[cpu_number(dev->cpu)];
-
-               bin = time_to_bin((u32)request / 1000);
-               idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
-               idle_stats.lp2_count++;
-               idle_stats.lp2_count_bin[bin]++;
-
-               trace_power_start(POWER_CSTATE, 2, dev->cpu);
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
-               if (!is_lp_cluster())
-                       tegra_dvfs_rail_off(tegra_cpu_rail, entry_time);
-
-               if (tegra_idle_lp2_last(sleep_time, 0) == 0)
-                       sleep_completed = true;
-               else {
-                       int irq = tegra_gic_pending_interrupt();
-                       idle_stats.lp2_int_count[irq]++;
+               if (request < state->target_residency) {
+                       /* Not enough time left to enter LP2 */
+                       tegra_gic_dist_enable();
+                       tegra3_lp3_fall_back(dev);
+                       return;
                }
 
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
-               exit_time = ktime_get();
-               if (!is_lp_cluster())
-                       tegra_dvfs_rail_on(tegra_cpu_rail, exit_time);
-               idle_stats.in_lp2_time[cpu_number(dev->cpu)] +=
-                       ktime_to_us(ktime_sub(exit_time, entry_time));
-       } else
-               exit_time = ktime_get();
-
-
-#ifdef CONFIG_SMP
-       if (!is_lp_cluster() && (num_online_cpus() > 1)) {
+               /* Cancel LP2 wake timers for all secondary CPUs */
+               tegra_lp2_timer_cancel_secondary();
 
-               /* Disable the distributor. */
-               tegra_gic_dist_disable();
-
-               /* Restore the other CPU's interrupt affinity. */
-               tegra_gic_restore_affinity();
+               /* Save and disable the affinity setting for the other
+                  CPUs and route all interrupts to CPU0. */
+               tegra_gic_disable_affinity();
 
                /* Re-enable the distributor. */
                tegra_gic_dist_enable();
        }
 #endif
 
+       sleep_time = request -
+               lp2_exit_latencies[cpu_number(dev->cpu)];
+
+       bin = time_to_bin((u32)request / 1000);
+       idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
+       idle_stats.lp2_count++;
+       idle_stats.lp2_count_bin[bin]++;
+
+       trace_power_start(POWER_CSTATE, 2, dev->cpu);
+       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       if (!is_lp_cluster())
+               tegra_dvfs_rail_off(tegra_cpu_rail, entry_time);
+
+       if (tegra_idle_lp2_last(sleep_time, 0) == 0)
+               sleep_completed = true;
+       else {
+               int irq = tegra_gic_pending_interrupt();
+               idle_stats.lp2_int_count[irq]++;
+       }
+
+       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       exit_time = ktime_get();
+       if (!is_lp_cluster())
+               tegra_dvfs_rail_on(tegra_cpu_rail, exit_time);
+       idle_stats.in_lp2_time[cpu_number(dev->cpu)] +=
+               ktime_to_us(ktime_sub(exit_time, entry_time));
+
+       if (multi_cpu_entry)
+               tegra3_lp2_restore_affinity();
+
        if (sleep_completed) {
                /*
                 * Stayed in LP2 for the full time until the next tick,
@@ -391,9 +416,12 @@ void tegra3_idle_lp2(struct cpuidle_device *dev,
 
        cpu_pm_enter();
 
-       if (last_cpu && (dev->cpu == 0))
-               tegra3_idle_enter_lp2_cpu_0(dev, state, request);
-       else
+       if (dev->cpu == 0) {
+               if (last_cpu)
+                       tegra3_idle_enter_lp2_cpu_0(dev, state, request);
+               else
+                       tegra3_lp3_fall_back(dev);
+       } else
                tegra3_idle_enter_lp2_cpu_n(dev, state, request);
 
        cpu_pm_exit();
index 982d2ed..0144a3c 100644 (file)
@@ -151,6 +151,7 @@ unsigned long tegra2_lp2_timer_remain(void);
 void tegra3_lp2_set_trigger(unsigned long cycles);
 unsigned long tegra3_lp2_timer_remain(void);
 int tegra3_is_lp2_timer_ready(unsigned int cpu);
+void tegra3_lp2_timer_cancel_secondary(void);
 #endif
 
 static inline void tegra_lp0_suspend_init(void)
@@ -189,6 +190,13 @@ static inline int tegra_is_lp2_timer_ready(unsigned int cpu)
 #endif
 }
 
+static inline void tegra_lp2_timer_cancel_secondary(void)
+{
+#ifndef CONFIG_ARCH_TEGRA_2x_SOC
+       tegra3_lp2_timer_cancel_secondary();
+#endif
+}
+
 #if DEBUG_CLUSTER_SWITCH && 0 /* !!!FIXME!!! THIS IS BROKEN */
 extern unsigned int tegra_cluster_debug;
 #define DEBUG_CLUSTER(x) do { if (tegra_cluster_debug) printk x; } while (0)
index b82a1be..0f42cdc 100644 (file)
@@ -71,6 +71,7 @@
 
 static void __iomem *timer_reg_base = IO_ADDRESS(TEGRA_TMR1_BASE);
 static cpumask_t wake_timer_ready;
+static cpumask_t wake_timer_canceled;
 
 #define timer_writel(value, reg) \
        __raw_writel(value, timer_reg_base + (reg))
@@ -218,6 +219,9 @@ unsigned long tegra3_lp2_timer_remain(void)
 {
        int cpu = cpu_number();
 
+       if (cpumask_test_and_clear_cpu(cpu, &wake_timer_canceled))
+               return -ETIME;
+
        return timer_readl(lp2_wake_timers[cpu] + TIMER_PCR) & 0x1ffffffful;
 }
 
@@ -225,6 +229,19 @@ int tegra3_is_lp2_timer_ready(unsigned int cpu)
 {
        return cpumask_test_cpu(cpu, &wake_timer_ready);
 }
+
+void tegra3_lp2_timer_cancel_secondary(void)
+{
+       int cpu;
+       int base;
+
+       for (cpu = 1; cpu < ARRAY_SIZE(lp2_wake_timers); cpu++) {
+               base = lp2_wake_timers[cpu];
+               cpumask_set_cpu(cpu, &wake_timer_canceled);
+               timer_writel(0, base + TIMER_PTV);
+               timer_writel(1<<30, base + TIMER_PCR);
+       }
+}
 #endif
 
 void __init tegra3_init_timer(u32 *offset, int *irq, unsigned long rate)