665e102ef40a3e1852f5b06d6f06dd20c1f93ed9
[linux-3.10.git] / arch / arm / mach-tegra / cpuidle-t3.c
1 /*
2  * arch/arm/mach-tegra/cpuidle-t3.c
3  *
4  * CPU idle driver for Tegra3 CPUs
5  *
6  * Copyright (c) 2010-2011, NVIDIA Corporation.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21  */
22
23 #include <linux/kernel.h>
24 #include <linux/cpu.h>
25 #include <linux/cpuidle.h>
26 #include <linux/debugfs.h>
27 #include <linux/delay.h>
28 #include <linux/hrtimer.h>
29 #include <linux/init.h>
30 #include <linux/interrupt.h>
31 #include <linux/irq.h>
32 #include <linux/io.h>
33 #include <linux/ratelimit.h>
34 #include <linux/sched.h>
35 #include <linux/seq_file.h>
36 #include <linux/slab.h>
37 #include <linux/smp.h>
38 #include <linux/suspend.h>
39 #include <linux/tick.h>
40 #include <linux/cpu_pm.h>
41 #include <linux/clk.h>
42 #include <linux/moduleparam.h>
43
44 #include <asm/cacheflush.h>
45 #include <asm/hardware/gic.h>
46 #include <asm/localtimer.h>
47 #include <asm/smp_twd.h>
48
49 #include <mach/iomap.h>
50 #include <mach/irqs.h>
51
52 #include <trace/events/power.h>
53
54 #include "clock.h"
55 #include "cpuidle.h"
56 #include "dvfs.h"
57 #include "fuse.h"
58 #include "gic.h"
59 #include "pm.h"
60 #include "reset.h"
61 #include "sleep.h"
62 #include "timer.h"
63
64 #define CLK_RST_CONTROLLER_CPU_CMPLX_STATUS \
65         (IO_ADDRESS(TEGRA_CLK_RESET_BASE) + 0x470)
66
67 #ifdef CONFIG_SMP
68 static s64 tegra_cpu_wake_by_time[4] = {
69         LLONG_MAX, LLONG_MAX, LLONG_MAX, LLONG_MAX };
70 #endif
71
72 static bool lp2_0_in_idle = true;
73 module_param(lp2_0_in_idle, bool, 0644);
74
75 static bool lp2_n_in_idle = true;
76 module_param(lp2_n_in_idle, bool, 0644);
77
78 static struct clk *cpu_clk_for_dvfs;
79 static struct clk *twd_clk;
80
81 static int lp2_exit_latencies[5];
82
83 static struct {
84         unsigned int cpu_ready_count[5];
85         unsigned int tear_down_count[5];
86         unsigned long long cpu_wants_lp2_time[5];
87         unsigned long long in_lp2_time[5];
88         unsigned int lp2_count;
89         unsigned int lp2_completed_count;
90         unsigned int lp2_count_bin[32];
91         unsigned int lp2_completed_count_bin[32];
92         unsigned int lp2_int_count[NR_IRQS];
93         unsigned int last_lp2_int_count[NR_IRQS];
94 } idle_stats;
95
96 static inline unsigned int time_to_bin(unsigned int time)
97 {
98         return fls(time);
99 }
100
101 static inline void tegra_irq_unmask(int irq)
102 {
103         struct irq_data *data = irq_get_irq_data(irq);
104         data->chip->irq_unmask(data);
105 }
106
107 static inline unsigned int cpu_number(unsigned int n)
108 {
109         return is_lp_cluster() ? 4 : n;
110 }
111
112 void tegra3_cpu_idle_stats_lp2_ready(unsigned int cpu)
113 {
114         idle_stats.cpu_ready_count[cpu_number(cpu)]++;
115 }
116
117 void tegra3_cpu_idle_stats_lp2_time(unsigned int cpu, s64 us)
118 {
119         idle_stats.cpu_wants_lp2_time[cpu_number(cpu)] += us;
120 }
121
122 bool tegra3_lp2_is_allowed(struct cpuidle_device *dev,
123         struct cpuidle_state *state)
124 {
125         s64 request;
126
127         if (!tegra_all_cpus_booted)
128                 return false;
129
130         if ((!lp2_0_in_idle && !dev->cpu) || (!lp2_n_in_idle && dev->cpu))
131                 return false;
132
133         /* On A01, LP2 on slave CPU's cause ranhdom CPU hangs.
134          * Refer to Bug 804085.
135          */
136         if ((tegra_revision == TEGRA_REVISION_A01) &&
137                 num_online_cpus() > 1)
138                 return false;
139
140         /* FIXME: All CPU's entering LP2 is not working.
141          * Don't let CPU0 enter LP2 when any secondary CPU is online.
142          */
143         if ((dev->cpu == 0) && (num_online_cpus() > 1))
144                 return false;
145
146         if (dev->cpu == 0) {
147                 u32 reg = readl(CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
148                 if ((reg & 0xE) != 0xE)
149                         return false;
150
151                 if (tegra_dvfs_rail_updating(cpu_clk_for_dvfs))
152                         return false;
153         }
154
155         request = ktime_to_us(tick_nohz_get_sleep_length());
156         if (state->exit_latency != lp2_exit_latencies[cpu_number(dev->cpu)]) {
157                 /* possible on the 1st entry after cluster switch*/
158                 state->exit_latency = lp2_exit_latencies[cpu_number(dev->cpu)];
159                 tegra_lp2_update_target_residency(state);
160         }
161         if (request < state->target_residency) {
162                 /* Not enough time left to enter LP2 */
163                 return false;
164         }
165
166         return true;
167 }
168
169 static inline void tegra3_lp3_fall_back(struct cpuidle_device *dev)
170 {
171         tegra_cpu_wfi();
172         /* fall back here from LP2 path - tell cpuidle governor */
173         dev->last_state = &dev->states[0];
174 }
175
176 static void tegra3_idle_enter_lp2_cpu_0(struct cpuidle_device *dev,
177                            struct cpuidle_state *state, s64 request)
178 {
179         ktime_t entry_time;
180         ktime_t exit_time;
181         bool sleep_completed = false;
182         int bin;
183
184         /* LP2 entry time */
185         entry_time = ktime_get();
186
187         if (request < state->target_residency) {
188                 /* Not enough time left to enter LP2 */
189                 tegra3_lp3_fall_back(dev);
190                 return;
191         }
192
193 #ifdef CONFIG_SMP
194         if (!is_lp_cluster() && (num_online_cpus() > 1)) {
195                 s64 wake_time;
196                 unsigned int i;
197
198                 /* Disable the distributor -- this is the only way to
199                    prevent the other CPUs from responding to interrupts
200                    and potentially fiddling with the distributor
201                    registers while we're fiddling with them. */
202                 tegra_gic_dist_disable();
203
204                 /* Did an interrupt come in for another CPU before we
205                    could disable the distributor? */
206                 if (!tegra3_lp2_is_allowed(dev, state)) {
207                         /* Yes, re-enable the distributor and LP3. */
208                         tegra_gic_dist_enable();
209                         tegra3_lp3_fall_back(dev);
210                         return;
211                 }
212
213                 /* Save and disable the affinity setting for the other
214                    CPUs and route all interrupts to CPU0. */
215                 tegra_gic_disable_affinity();
216
217                 /* Re-enable the distributor. */
218                 tegra_gic_dist_enable();
219
220                 /* LP2 initial targeted wake time */
221                 wake_time = ktime_to_us(entry_time) + request;
222
223                 /* CPU0 must wake up before any of the other CPUs. */
224                 smp_rmb();
225                 for (i = 1; i < CONFIG_NR_CPUS; i++)
226                         wake_time = min_t(s64, wake_time,
227                                 tegra_cpu_wake_by_time[i]);
228
229                 /* LP2 actual targeted wake time */
230                 request = wake_time - ktime_to_us(entry_time);
231                 BUG_ON(wake_time < 0LL);
232         }
233 #endif
234
235         if (request > state->target_residency) {
236                 s64 sleep_time = request -
237                         lp2_exit_latencies[cpu_number(dev->cpu)];
238
239                 bin = time_to_bin((u32)request / 1000);
240                 idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
241                 idle_stats.lp2_count++;
242                 idle_stats.lp2_count_bin[bin]++;
243
244                 trace_power_start(POWER_CSTATE, 2, dev->cpu);
245                 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
246                 if (!is_lp_cluster())
247                         tegra_dvfs_rail_off(tegra_cpu_rail, entry_time);
248
249                 if (tegra_idle_lp2_last(sleep_time, 0) == 0)
250                         sleep_completed = true;
251                 else {
252                         int irq = tegra_gic_pending_interrupt();
253                         idle_stats.lp2_int_count[irq]++;
254                 }
255
256                 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
257                 exit_time = ktime_get();
258                 if (!is_lp_cluster())
259                         tegra_dvfs_rail_on(tegra_cpu_rail, exit_time);
260                 idle_stats.in_lp2_time[cpu_number(dev->cpu)] +=
261                         ktime_to_us(ktime_sub(exit_time, entry_time));
262         } else
263                 exit_time = ktime_get();
264
265
266 #ifdef CONFIG_SMP
267         if (!is_lp_cluster() && (num_online_cpus() > 1)) {
268
269                 /* Disable the distributor. */
270                 tegra_gic_dist_disable();
271
272                 /* Restore the other CPU's interrupt affinity. */
273                 tegra_gic_restore_affinity();
274
275                 /* Re-enable the distributor. */
276                 tegra_gic_dist_enable();
277         }
278 #endif
279
280         if (sleep_completed) {
281                 /*
282                  * Stayed in LP2 for the full time until the next tick,
283                  * adjust the exit latency based on measurement
284                  */
285                 int offset = ktime_to_us(ktime_sub(exit_time, entry_time))
286                         - request;
287                 int latency = lp2_exit_latencies[cpu_number(dev->cpu)] +
288                         offset / 16;
289                 latency = clamp(latency, 0, 10000);
290                 lp2_exit_latencies[cpu_number(dev->cpu)] = latency;
291                 state->exit_latency = latency;          /* for idle governor */
292                 smp_wmb();
293
294                 idle_stats.lp2_completed_count++;
295                 idle_stats.lp2_completed_count_bin[bin]++;
296
297                 pr_debug("%lld %lld %d %d\n", request,
298                         ktime_to_us(ktime_sub(exit_time, entry_time)),
299                         offset, bin);
300         }
301 }
302
303 static void tegra3_idle_enter_lp2_cpu_n(struct cpuidle_device *dev,
304                            struct cpuidle_state *state, s64 request)
305 {
306 #ifdef CONFIG_SMP
307         s64 sleep_time;
308         ktime_t entry_time;
309         struct tegra_twd_context twd_context;
310         bool sleep_completed = false;
311
312         if (!tegra_twd_get_state(&twd_context)) {
313                 unsigned long twd_rate = clk_get_rate(twd_clk);
314
315                 if ((twd_context.twd_ctrl & TWD_TIMER_CONTROL_ENABLE) &&
316                     (twd_context.twd_ctrl & TWD_TIMER_CONTROL_IT_ENABLE)) {
317                         request = div_u64((u64)twd_context.twd_cnt * 1000000,
318                                           twd_rate);
319 #ifdef CONFIG_TEGRA_LP2_ARM_TWD
320                         if (request >= state->target_residency) {
321                                 twd_context.twd_cnt -= state->exit_latency *
322                                         (twd_rate / 1000000);
323                                 writel(twd_context.twd_cnt,
324                                         twd_base + TWD_TIMER_COUNTER);
325                         }
326 #endif
327                 }
328         }
329
330         if (!tegra_is_lp2_timer_ready(dev->cpu) ||
331             (request < state->target_residency)) {
332                 /*
333                  * Not enough time left to enter LP2, or wake timer not ready
334                  */
335                 tegra3_lp3_fall_back(dev);
336                 return;
337         }
338
339 #ifndef CONFIG_TEGRA_LP2_ARM_TWD
340         sleep_time = request - state->exit_latency;
341         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
342         tegra_twd_suspend(&twd_context);
343         tegra_lp2_set_trigger(sleep_time);
344 #endif
345         idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
346
347         trace_power_start(POWER_CSTATE, 2, dev->cpu);
348
349         entry_time = ktime_get();
350
351         /* Save time this CPU must be awakened by. */
352         tegra_cpu_wake_by_time[dev->cpu] = ktime_to_us(entry_time) + request;
353         smp_wmb();
354
355         tegra3_sleep_cpu_secondary(PLAT_PHYS_OFFSET - PAGE_OFFSET);
356
357         tegra_cpu_wake_by_time[dev->cpu] = LLONG_MAX;
358
359 #ifdef CONFIG_TEGRA_LP2_ARM_TWD
360         if (!tegra_twd_get_state(&twd_context))
361                 sleep_completed = (twd_context.twd_cnt == 0);
362 #else
363         sleep_completed = !tegra_lp2_timer_remain();
364         tegra_lp2_set_trigger(0);
365         tegra_twd_resume(&twd_context);
366         clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
367 #endif
368         sleep_time = ktime_to_us(ktime_sub(ktime_get(), entry_time));
369         idle_stats.in_lp2_time[cpu_number(dev->cpu)] += sleep_time;
370         if (sleep_completed) {
371                 /*
372                  * Stayed in LP2 for the full time until timer expires,
373                  * adjust the exit latency based on measurement
374                  */
375                 int offset = sleep_time - request;
376                 int latency = lp2_exit_latencies[cpu_number(dev->cpu)] +
377                         offset / 16;
378                 latency = clamp(latency, 0, 10000);
379                 lp2_exit_latencies[cpu_number(dev->cpu)] = latency;
380                 state->exit_latency = latency;          /* for idle governor */
381                 smp_wmb();
382         }
383 #endif
384 }
385
386 void tegra3_idle_lp2(struct cpuidle_device *dev,
387                            struct cpuidle_state *state)
388 {
389         s64 request = ktime_to_us(tick_nohz_get_sleep_length());
390         bool last_cpu = tegra_set_cpu_in_lp2(dev->cpu);
391
392         cpu_pm_enter();
393
394         if (last_cpu && (dev->cpu == 0))
395                 tegra3_idle_enter_lp2_cpu_0(dev, state, request);
396         else
397                 tegra3_idle_enter_lp2_cpu_n(dev, state, request);
398
399         cpu_pm_exit();
400         tegra_clear_cpu_in_lp2(dev->cpu);
401 }
402
403 int tegra3_cpudile_init_soc(void)
404 {
405         int i;
406
407         cpu_clk_for_dvfs = tegra_get_clock_by_name("cpu_g");
408         twd_clk = tegra_get_clock_by_name("twd");
409
410         for (i = 0; i < ARRAY_SIZE(lp2_exit_latencies); i++)
411                 lp2_exit_latencies[i] = tegra_lp2_exit_latency;
412
413         return 0;
414 }
415
416 #ifdef CONFIG_DEBUG_FS
417 int tegra3_lp2_debug_show(struct seq_file *s, void *data)
418 {
419         int bin;
420         int i;
421         seq_printf(s, "                                    cpu0     cpu1     cpu2     cpu3     cpulp\n");
422         seq_printf(s, "-----------------------------------------------------------------------------\n");
423         seq_printf(s, "cpu ready:                      %8u %8u %8u %8u %8u\n",
424                 idle_stats.cpu_ready_count[0],
425                 idle_stats.cpu_ready_count[1],
426                 idle_stats.cpu_ready_count[2],
427                 idle_stats.cpu_ready_count[3],
428                 idle_stats.cpu_ready_count[4]);
429         seq_printf(s, "tear down:                      %8u %8u %8u %8u %8u\n",
430                 idle_stats.tear_down_count[0],
431                 idle_stats.tear_down_count[1],
432                 idle_stats.tear_down_count[2],
433                 idle_stats.tear_down_count[3],
434                 idle_stats.tear_down_count[4]);
435         seq_printf(s, "lp2:            %8u\n", idle_stats.lp2_count);
436         seq_printf(s, "lp2 completed:  %8u %7u%%\n",
437                 idle_stats.lp2_completed_count,
438                 idle_stats.lp2_completed_count * 100 /
439                         (idle_stats.lp2_count ?: 1));
440
441         seq_printf(s, "\n");
442         seq_printf(s, "cpu ready time:                 %8llu %8llu %8llu %8llu %8llu ms\n",
443                 div64_u64(idle_stats.cpu_wants_lp2_time[0], 1000),
444                 div64_u64(idle_stats.cpu_wants_lp2_time[1], 1000),
445                 div64_u64(idle_stats.cpu_wants_lp2_time[2], 1000),
446                 div64_u64(idle_stats.cpu_wants_lp2_time[3], 1000),
447                 div64_u64(idle_stats.cpu_wants_lp2_time[4], 1000));
448
449         seq_printf(s, "lp2 time:                       %8llu %8llu %8llu %8llu %8llu ms\n",
450                 div64_u64(idle_stats.in_lp2_time[0], 1000),
451                 div64_u64(idle_stats.in_lp2_time[1], 1000),
452                 div64_u64(idle_stats.in_lp2_time[2], 1000),
453                 div64_u64(idle_stats.in_lp2_time[3], 1000),
454                 div64_u64(idle_stats.in_lp2_time[4], 1000));
455
456         seq_printf(s, "lp2 %%:                         %7d%% %7d%% %7d%% %7d%% %7d%%\n",
457                 (int)(idle_stats.cpu_wants_lp2_time[0] ?
458                         div64_u64(idle_stats.in_lp2_time[0] * 100,
459                         idle_stats.cpu_wants_lp2_time[0]) : 0),
460                 (int)(idle_stats.cpu_wants_lp2_time[1] ?
461                         div64_u64(idle_stats.in_lp2_time[1] * 100,
462                         idle_stats.cpu_wants_lp2_time[1]) : 0),
463                 (int)(idle_stats.cpu_wants_lp2_time[2] ?
464                         div64_u64(idle_stats.in_lp2_time[2] * 100,
465                         idle_stats.cpu_wants_lp2_time[2]) : 0),
466                 (int)(idle_stats.cpu_wants_lp2_time[3] ?
467                         div64_u64(idle_stats.in_lp2_time[3] * 100,
468                         idle_stats.cpu_wants_lp2_time[3]) : 0),
469                 (int)(idle_stats.cpu_wants_lp2_time[4] ?
470                         div64_u64(idle_stats.in_lp2_time[4] * 100,
471                         idle_stats.cpu_wants_lp2_time[4]) : 0));
472         seq_printf(s, "\n");
473
474         seq_printf(s, "%19s %8s %8s %8s\n", "", "lp2", "comp", "%");
475         seq_printf(s, "-------------------------------------------------\n");
476         for (bin = 0; bin < 32; bin++) {
477                 if (idle_stats.lp2_count_bin[bin] == 0)
478                         continue;
479                 seq_printf(s, "%6u - %6u ms: %8u %8u %7u%%\n",
480                         1 << (bin - 1), 1 << bin,
481                         idle_stats.lp2_count_bin[bin],
482                         idle_stats.lp2_completed_count_bin[bin],
483                         idle_stats.lp2_completed_count_bin[bin] * 100 /
484                                 idle_stats.lp2_count_bin[bin]);
485         }
486
487         seq_printf(s, "\n");
488         seq_printf(s, "%3s %20s %6s %10s\n",
489                 "int", "name", "count", "last count");
490         seq_printf(s, "--------------------------------------------\n");
491         for (i = 0; i < NR_IRQS; i++) {
492                 if (idle_stats.lp2_int_count[i] == 0)
493                         continue;
494                 seq_printf(s, "%3d %20s %6d %10d\n",
495                         i, irq_to_desc(i)->action ?
496                                 irq_to_desc(i)->action->name ?: "???" : "???",
497                         idle_stats.lp2_int_count[i],
498                         idle_stats.lp2_int_count[i] -
499                                 idle_stats.last_lp2_int_count[i]);
500                 idle_stats.last_lp2_int_count[i] = idle_stats.lp2_int_count[i];
501         };
502         return 0;
503 }
504 #endif