ARM: tegra11x: residency time for different LP2 mode
[linux-3.10.git] / arch / arm / mach-tegra / cpuidle-t11x.c
1 /*
2  * arch/arm/mach-tegra/cpuidle-t11x.c
3  *
4  * CPU idle driver for Tegra11x CPUs
5  *
6  * Copyright (c) 2012, NVIDIA Corporation.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21  */
22
23 #include <linux/kernel.h>
24 #include <linux/cpu.h>
25 #include <linux/cpuidle.h>
26 #include <linux/debugfs.h>
27 #include <linux/delay.h>
28 #include <linux/hrtimer.h>
29 #include <linux/init.h>
30 #include <linux/interrupt.h>
31 #include <linux/irq.h>
32 #include <linux/io.h>
33 #include <linux/ratelimit.h>
34 #include <linux/sched.h>
35 #include <linux/seq_file.h>
36 #include <linux/slab.h>
37 #include <linux/smp.h>
38 #include <linux/suspend.h>
39 #include <linux/tick.h>
40 #include <linux/clk.h>
41 #include <linux/cpu_pm.h>
42 #include <linux/module.h>
43
44 #include <asm/cacheflush.h>
45 #include <asm/hardware/gic.h>
46 #include <asm/localtimer.h>
47 #include <asm/suspend.h>
48 #include <asm/cputype.h>
49
50 #include <mach/iomap.h>
51 #include <mach/irqs.h>
52 #include <mach/hardware.h>
53
54 #include <trace/events/power.h>
55
56 #include "clock.h"
57 #include "cpuidle.h"
58 #include "dvfs.h"
59 #include "fuse.h"
60 #include "gic.h"
61 #include "pm.h"
62 #include "reset.h"
63 #include "sleep.h"
64 #include "timer.h"
65 #include "fuse.h"
66
67 #define CLK_RST_CONTROLLER_CPU_CMPLX_STATUS \
68         (IO_ADDRESS(TEGRA_CLK_RESET_BASE) + 0x470)
69 #define PMC_POWERGATE_STATUS \
70         (IO_ADDRESS(TEGRA_PMC_BASE) + 0x038)
71
72 #define ARCH_TIMER_CTRL_ENABLE          (1 << 0)
73 #define ARCH_TIMER_CTRL_IT_MASK         (1 << 1)
74
75 #ifdef CONFIG_SMP
76 static s64 tegra_cpu_wake_by_time[4] = {
77         LLONG_MAX, LLONG_MAX, LLONG_MAX, LLONG_MAX };
78 #endif
79
80 static ulong cpu_power_gating_in_idle __read_mostly = 0x1f;
81 module_param(cpu_power_gating_in_idle, ulong, 0644);
82
83 static bool slow_cluster_power_gating_noncpu __read_mostly;
84 module_param(slow_cluster_power_gating_noncpu, bool, 0644);
85
86 static uint fast_cluster_power_down_mode __read_mostly;
87 module_param(fast_cluster_power_down_mode, uint, 0644);
88
89 static struct clk *cpu_clk_for_dvfs;
90
91 static int lp2_exit_latencies[5];
92
93 static struct {
94         unsigned int cpu_ready_count[5];
95         unsigned int tear_down_count[5];
96         unsigned long long cpu_wants_lp2_time[5];
97         unsigned long long in_lp2_time[5];
98         unsigned int lp2_count;
99         unsigned int lp2_completed_count;
100         unsigned int lp2_count_bin[32];
101         unsigned int lp2_completed_count_bin[32];
102         unsigned int lp2_int_count[NR_IRQS];
103         unsigned int last_lp2_int_count[NR_IRQS];
104 } idle_stats;
105
106 static inline unsigned int time_to_bin(unsigned int time)
107 {
108         return fls(time);
109 }
110
111 static inline void tegra_irq_unmask(int irq)
112 {
113         struct irq_data *data = irq_get_irq_data(irq);
114         data->chip->irq_unmask(data);
115 }
116
117 static inline unsigned int cpu_number(unsigned int n)
118 {
119         return is_lp_cluster() ? 4 : n;
120 }
121
122 void tegra11x_cpu_idle_stats_lp2_ready(unsigned int cpu)
123 {
124         idle_stats.cpu_ready_count[cpu_number(cpu)]++;
125 }
126
127 void tegra11x_cpu_idle_stats_lp2_time(unsigned int cpu, s64 us)
128 {
129         idle_stats.cpu_wants_lp2_time[cpu_number(cpu)] += us;
130 }
131
132 /* Allow rail off only if all secondary CPUs are power gated, and no
133    rail update is in progress */
134 static bool tegra_rail_off_is_allowed(void)
135 {
136         u32 rst = readl(CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
137         u32 pg = readl(PMC_POWERGATE_STATUS) >> 8;
138
139         if (((rst & 0xE) != 0xE) || ((pg & 0xE) != 0))
140                 return false;
141
142         if (tegra_dvfs_rail_updating(cpu_clk_for_dvfs))
143                 return false;
144
145         return true;
146 }
147
148 bool tegra11x_lp2_is_allowed(struct cpuidle_device *dev,
149         struct cpuidle_state *state)
150 {
151         s64 request;
152
153         if (!cpumask_test_cpu(cpu_number(dev->cpu),
154                                 to_cpumask(&cpu_power_gating_in_idle)))
155                 return false;
156
157         request = ktime_to_us(tick_nohz_get_sleep_length());
158         if (state->exit_latency != lp2_exit_latencies[cpu_number(dev->cpu)]) {
159                 /* possible on the 1st entry after cluster switch*/
160                 state->exit_latency = lp2_exit_latencies[cpu_number(dev->cpu)];
161                 tegra_lp2_update_target_residency(state);
162         }
163         if (request < state->target_residency) {
164                 /* Not enough time left to enter LP2 */
165                 return false;
166         }
167
168         return true;
169 }
170
171 static inline void tegra11_lp2_restore_affinity(void)
172 {
173 #ifdef CONFIG_SMP
174         /* Disable the distributor. */
175         tegra_gic_dist_disable();
176
177         /* Restore the other CPU's interrupt affinity. */
178         tegra_gic_restore_affinity();
179
180         /* Re-enable the distributor. */
181         tegra_gic_dist_enable();
182 #endif
183 }
184
185 static bool tegra_cpu_cluster_power_down(struct cpuidle_device *dev,
186                            struct cpuidle_state *state, s64 request)
187 {
188         ktime_t entry_time;
189         ktime_t exit_time;
190         bool sleep_completed = false;
191         bool multi_cpu_entry = false;
192         int bin;
193         unsigned int flag = 0;
194         s64 sleep_time;
195
196         /* LP2 entry time */
197         entry_time = ktime_get();
198
199         if (request < state->target_residency) {
200                 /* Not enough time left to enter LP2 */
201                 tegra_cpu_wfi();
202                 return false;
203         }
204
205 #ifdef CONFIG_SMP
206         multi_cpu_entry = !is_lp_cluster() && (num_online_cpus() > 1);
207         if (multi_cpu_entry) {
208                 s64 wake_time;
209                 unsigned int i;
210
211                 /* Disable the distributor -- this is the only way to
212                    prevent the other CPUs from responding to interrupts
213                    and potentially fiddling with the distributor
214                    registers while we're fiddling with them. */
215                 tegra_gic_dist_disable();
216
217                 /* Did an interrupt come in for another CPU before we
218                    could disable the distributor? */
219                 if (!tegra_rail_off_is_allowed()) {
220                         /* Yes, re-enable the distributor and LP3. */
221                         tegra_gic_dist_enable();
222                         tegra_cpu_wfi();
223                         return false;
224                 }
225
226                 /* LP2 initial targeted wake time */
227                 wake_time = ktime_to_us(entry_time) + request;
228
229                 /* CPU0 must wake up before any of the other CPUs. */
230                 smp_rmb();
231                 for (i = 1; i < CONFIG_NR_CPUS; i++)
232                         wake_time = min_t(s64, wake_time,
233                                 tegra_cpu_wake_by_time[i]);
234
235                 /* LP2 actual targeted wake time */
236                 request = wake_time - ktime_to_us(entry_time);
237                 BUG_ON(wake_time < 0LL);
238
239                 if (request < state->target_residency) {
240                         /* Not enough time left to enter LP2 */
241                         tegra_gic_dist_enable();
242                         tegra_cpu_wfi();
243                         return false;
244                 }
245
246                 /* Cancel LP2 wake timers for all secondary CPUs */
247                 tegra_lp2_timer_cancel_secondary();
248
249                 /* Save and disable the affinity setting for the other
250                    CPUs and route all interrupts to CPU0. */
251                 tegra_gic_disable_affinity();
252
253                 /* Re-enable the distributor. */
254                 tegra_gic_dist_enable();
255         }
256 #endif
257         cpu_pm_enter();
258
259         sleep_time = request -
260                 lp2_exit_latencies[cpu_number(dev->cpu)];
261
262         bin = time_to_bin((u32)request / 1000);
263         idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
264         idle_stats.lp2_count++;
265         idle_stats.lp2_count_bin[bin]++;
266
267         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
268         if (is_lp_cluster()) {
269                 /* here we are not supporting emulation mode, for now */
270                 flag = TEGRA_POWER_CLUSTER_PART_NONCPU;
271         } else {
272                 tegra_dvfs_rail_off(tegra_cpu_rail, entry_time);
273                 flag = (fast_cluster_power_down_mode
274                         << TEGRA_POWER_CLUSTER_PART_SHIFT)
275                         & TEGRA_POWER_CLUSTER_PART_MASK;
276                 if ((request < tegra_min_residency_crail()) &&
277                         (flag != TEGRA_POWER_CLUSTER_PART_MASK))
278                         flag = TEGRA_POWER_CLUSTER_PART_NONCPU;
279         }
280
281         if (tegra_idle_lp2_last(sleep_time, flag) == 0)
282                 sleep_completed = true;
283         else {
284                 int irq = tegra_gic_pending_interrupt();
285                 idle_stats.lp2_int_count[irq]++;
286         }
287
288         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
289         exit_time = ktime_get();
290         if (!is_lp_cluster())
291                 tegra_dvfs_rail_on(tegra_cpu_rail, exit_time);
292
293         idle_stats.in_lp2_time[cpu_number(dev->cpu)] +=
294                 ktime_to_us(ktime_sub(exit_time, entry_time));
295
296         if (multi_cpu_entry)
297                 tegra11_lp2_restore_affinity();
298
299         if (sleep_completed) {
300                 /*
301                  * Stayed in LP2 for the full time until the next tick,
302                  * adjust the exit latency based on measurement
303                  */
304                 int offset = ktime_to_us(ktime_sub(exit_time, entry_time))
305                         - request;
306                 int latency = lp2_exit_latencies[cpu_number(dev->cpu)] +
307                         offset / 16;
308                 latency = clamp(latency, 0, 10000);
309                 lp2_exit_latencies[cpu_number(dev->cpu)] = latency;
310                 state->exit_latency = latency;          /* for idle governor */
311                 smp_wmb();
312
313                 idle_stats.lp2_completed_count++;
314                 idle_stats.lp2_completed_count_bin[bin]++;
315
316                 pr_debug("%lld %lld %d %d\n", request,
317                         ktime_to_us(ktime_sub(exit_time, entry_time)),
318                         offset, bin);
319         }
320
321         cpu_pm_exit();
322
323         return true;
324 }
325
326 static bool tegra_cpu_core_power_down(struct cpuidle_device *dev,
327                            struct cpuidle_state *state, s64 request)
328 {
329 #ifdef CONFIG_SMP
330         s64 sleep_time;
331         ktime_t entry_time;
332         struct arch_timer_context timer_context;
333         bool sleep_completed = false;
334         struct tick_sched *ts = tick_get_tick_sched(dev->cpu);
335
336         if (!arch_timer_get_state(&timer_context)) {
337                 if ((timer_context.cntp_ctl & ARCH_TIMER_CTRL_ENABLE) &&
338                     ~(timer_context.cntp_ctl & ARCH_TIMER_CTRL_IT_MASK)) {
339                         if (timer_context.cntp_tval <= 0) {
340                                 tegra_cpu_wfi();
341                                 return false;
342                         }
343                         request = div_u64((u64)timer_context.cntp_tval *
344                                         1000000, timer_context.cntfrq);
345 #ifdef CONFIG_TEGRA_LP2_CPU_TIMER
346                         if (request >= state->target_residency) {
347                                 timer_context.cntp_tval -= state->exit_latency *
348                                         (timer_context.cntfrq / 1000000);
349                                 __asm__("mcr p15, 0, %0, c14, c2, 0\n"
350                                         :
351                                         :
352                                         "r"(timer_context.cntp_tval));
353                         }
354 #endif
355                 }
356         }
357
358         if (!tegra_is_lp2_timer_ready(dev->cpu) ||
359             (request < state->target_residency) ||
360             (!ts) || (ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
361                 /*
362                  * Not enough time left to enter LP2, or wake timer not ready
363                  */
364                 tegra_cpu_wfi();
365                 return false;
366         }
367
368         cpu_pm_enter();
369
370 #if !defined(CONFIG_TEGRA_LP2_CPU_TIMER)
371         sleep_time = request - state->exit_latency;
372         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
373         arch_timer_suspend(&timer_context);
374         tegra_lp2_set_trigger(sleep_time);
375 #endif
376         idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
377
378         entry_time = ktime_get();
379
380         /* Save time this CPU must be awakened by. */
381         tegra_cpu_wake_by_time[dev->cpu] = ktime_to_us(entry_time) + request;
382         smp_wmb();
383
384         cpu_suspend(0, tegra3_sleep_cpu_secondary_finish);
385
386         tegra_cpu_wake_by_time[dev->cpu] = LLONG_MAX;
387
388 #ifdef CONFIG_TEGRA_LP2_CPU_TIMER
389         if (!arch_timer_get_state(&timer_context))
390                 sleep_completed = (timer_context.cntp_tval <= 0);
391 #else
392         sleep_completed = !tegra_lp2_timer_remain();
393         tegra_lp2_set_trigger(0);
394         arch_timer_resume(&timer_context);
395         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
396 #endif
397         sleep_time = ktime_to_us(ktime_sub(ktime_get(), entry_time));
398         idle_stats.in_lp2_time[cpu_number(dev->cpu)] += sleep_time;
399         if (sleep_completed) {
400                 /*
401                  * Stayed in LP2 for the full time until timer expires,
402                  * adjust the exit latency based on measurement
403                  */
404                 int offset = sleep_time - request;
405                 int latency = lp2_exit_latencies[cpu_number(dev->cpu)] +
406                         offset / 16;
407                 latency = clamp(latency, 0, 10000);
408                 lp2_exit_latencies[cpu_number(dev->cpu)] = latency;
409                 state->exit_latency = latency;          /* for idle governor */
410                 smp_wmb();
411         }
412 #endif
413         cpu_pm_exit();
414
415         return true;
416 }
417
418 bool tegra11x_idle_lp2(struct cpuidle_device *dev,
419                            struct cpuidle_state *state)
420 {
421         bool entered_lp2;
422         bool cpu_gating_only = false;
423         bool power_gating_cpu_only = true;
424         s64 request = ktime_to_us(tick_nohz_get_sleep_length());
425
426         tegra_set_cpu_in_lp2(dev->cpu);
427         cpu_gating_only = (((fast_cluster_power_down_mode
428                         << TEGRA_POWER_CLUSTER_PART_SHIFT)
429                         & TEGRA_POWER_CLUSTER_PART_MASK) == 0);
430
431         if (is_lp_cluster()) {
432                 if (slow_cluster_power_gating_noncpu &&
433                         (request > tegra_min_residency_noncpu()))
434                                 power_gating_cpu_only = false;
435                 else
436                         power_gating_cpu_only = true;
437         } else if (!cpu_gating_only &&
438                 (num_online_cpus() == 1) &&
439                 tegra_rail_off_is_allowed() &&
440                 (request > tegra_min_residency_noncpu()))
441                         power_gating_cpu_only = false;
442         else
443                 power_gating_cpu_only = true;
444
445         if (power_gating_cpu_only)
446                 entered_lp2 = tegra_cpu_core_power_down(dev, state, request);
447         else
448                 entered_lp2 = tegra_cpu_cluster_power_down(dev, state, request);
449
450         tegra_clear_cpu_in_lp2(dev->cpu);
451
452         return entered_lp2;
453 }
454
455 int tegra11x_cpuidle_init_soc(void)
456 {
457         int i;
458
459         cpu_clk_for_dvfs = tegra_get_clock_by_name("cpu_g");
460
461         for (i = 0; i < ARRAY_SIZE(lp2_exit_latencies); i++)
462                 lp2_exit_latencies[i] = tegra_lp2_exit_latency;
463
464         return 0;
465 }
466
467 #ifdef CONFIG_DEBUG_FS
468 int tegra11x_lp2_debug_show(struct seq_file *s, void *data)
469 {
470         int bin;
471         int i;
472         seq_printf(s, "                                    cpu0     cpu1     cpu2     cpu3     cpulp\n");
473         seq_printf(s, "-----------------------------------------------------------------------------\n");
474         seq_printf(s, "cpu ready:                      %8u %8u %8u %8u %8u\n",
475                 idle_stats.cpu_ready_count[0],
476                 idle_stats.cpu_ready_count[1],
477                 idle_stats.cpu_ready_count[2],
478                 idle_stats.cpu_ready_count[3],
479                 idle_stats.cpu_ready_count[4]);
480         seq_printf(s, "tear down:                      %8u %8u %8u %8u %8u\n",
481                 idle_stats.tear_down_count[0],
482                 idle_stats.tear_down_count[1],
483                 idle_stats.tear_down_count[2],
484                 idle_stats.tear_down_count[3],
485                 idle_stats.tear_down_count[4]);
486         seq_printf(s, "lp2:            %8u\n", idle_stats.lp2_count);
487         seq_printf(s, "lp2 completed:  %8u %7u%%\n",
488                 idle_stats.lp2_completed_count,
489                 idle_stats.lp2_completed_count * 100 /
490                         (idle_stats.lp2_count ?: 1));
491
492         seq_printf(s, "\n");
493         seq_printf(s, "cpu ready time:                 %8llu %8llu %8llu %8llu %8llu ms\n",
494                 div64_u64(idle_stats.cpu_wants_lp2_time[0], 1000),
495                 div64_u64(idle_stats.cpu_wants_lp2_time[1], 1000),
496                 div64_u64(idle_stats.cpu_wants_lp2_time[2], 1000),
497                 div64_u64(idle_stats.cpu_wants_lp2_time[3], 1000),
498                 div64_u64(idle_stats.cpu_wants_lp2_time[4], 1000));
499
500         seq_printf(s, "lp2 time:                       %8llu %8llu %8llu %8llu %8llu ms\n",
501                 div64_u64(idle_stats.in_lp2_time[0], 1000),
502                 div64_u64(idle_stats.in_lp2_time[1], 1000),
503                 div64_u64(idle_stats.in_lp2_time[2], 1000),
504                 div64_u64(idle_stats.in_lp2_time[3], 1000),
505                 div64_u64(idle_stats.in_lp2_time[4], 1000));
506
507         seq_printf(s, "lp2 %%:                         %7d%% %7d%% %7d%% %7d%% %7d%%\n",
508                 (int)(idle_stats.cpu_wants_lp2_time[0] ?
509                         div64_u64(idle_stats.in_lp2_time[0] * 100,
510                         idle_stats.cpu_wants_lp2_time[0]) : 0),
511                 (int)(idle_stats.cpu_wants_lp2_time[1] ?
512                         div64_u64(idle_stats.in_lp2_time[1] * 100,
513                         idle_stats.cpu_wants_lp2_time[1]) : 0),
514                 (int)(idle_stats.cpu_wants_lp2_time[2] ?
515                         div64_u64(idle_stats.in_lp2_time[2] * 100,
516                         idle_stats.cpu_wants_lp2_time[2]) : 0),
517                 (int)(idle_stats.cpu_wants_lp2_time[3] ?
518                         div64_u64(idle_stats.in_lp2_time[3] * 100,
519                         idle_stats.cpu_wants_lp2_time[3]) : 0),
520                 (int)(idle_stats.cpu_wants_lp2_time[4] ?
521                         div64_u64(idle_stats.in_lp2_time[4] * 100,
522                         idle_stats.cpu_wants_lp2_time[4]) : 0));
523         seq_printf(s, "\n");
524
525         seq_printf(s, "%19s %8s %8s %8s\n", "", "lp2", "comp", "%");
526         seq_printf(s, "-------------------------------------------------\n");
527         for (bin = 0; bin < 32; bin++) {
528                 if (idle_stats.lp2_count_bin[bin] == 0)
529                         continue;
530                 seq_printf(s, "%6u - %6u ms: %8u %8u %7u%%\n",
531                         1 << (bin - 1), 1 << bin,
532                         idle_stats.lp2_count_bin[bin],
533                         idle_stats.lp2_completed_count_bin[bin],
534                         idle_stats.lp2_completed_count_bin[bin] * 100 /
535                                 idle_stats.lp2_count_bin[bin]);
536         }
537
538         seq_printf(s, "\n");
539         seq_printf(s, "%3s %20s %6s %10s\n",
540                 "int", "name", "count", "last count");
541         seq_printf(s, "--------------------------------------------\n");
542         for (i = 0; i < NR_IRQS; i++) {
543                 if (idle_stats.lp2_int_count[i] == 0)
544                         continue;
545                 seq_printf(s, "%3d %20s %6d %10d\n",
546                         i, irq_to_desc(i)->action ?
547                                 irq_to_desc(i)->action->name ?: "???" : "???",
548                         idle_stats.lp2_int_count[i],
549                         idle_stats.lp2_int_count[i] -
550                                 idle_stats.last_lp2_int_count[i]);
551                 idle_stats.last_lp2_int_count[i] = idle_stats.lp2_int_count[i];
552         };
553         return 0;
554 }
555 #endif