gpu: nvgpu: Disable GM20b clock slowdown for monitor
[linux-3.10.git] / drivers / gpu / nvgpu / gm20b / clk_gm20b.c
1 /*
2  * GM20B Clocks
3  *
4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/clk.h>
20 #include <linux/delay.h>        /* for mdelay */
21 #include <linux/module.h>
22 #include <linux/debugfs.h>
23 #include <linux/clk/tegra.h>
24
25 #include "gk20a/gk20a.h"
26 #include "hw_trim_gm20b.h"
27 #include "hw_timer_gm20b.h"
28 #include "hw_therm_gm20b.h"
29 #include "clk_gm20b.h"
30
31 #define gk20a_dbg_clk(fmt, arg...) \
32         gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
33
34 /* from vbios PLL info table */
35 static struct pll_parms gpc_pll_params = {
36         128000,  2600000,       /* freq */
37         1300000, 2600000,       /* vco */
38         12000,   38400,         /* u */
39         1, 255,                 /* M */
40         8, 255,                 /* N */
41         1, 31,                  /* PL */
42 };
43
44 #ifdef CONFIG_DEBUG_FS
45 static int clk_gm20b_debugfs_init(struct gk20a *g);
46 #endif
47
48 /* 1:1 match between post divider settings and divisor value */
49 static inline u32 pl_to_div(u32 pl)
50 {
51         return pl;
52 }
53
54 static inline u32 div_to_pl(u32 div)
55 {
56         return div;
57 }
58
59 /* FIXME: remove after on-silicon testing */
60 #define PLDIV_GLITCHLESS 1
61
62 /* Calculate and update M/N/PL as well as pll->freq
63     ref_clk_f = clk_in_f;
64     u_f = ref_clk_f / M;
65     vco_f = u_f * N = ref_clk_f * N / M;
66     PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
67     gpcclk = gpc2clk / 2; */
68 static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
69         struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
70 {
71         u32 min_vco_f, max_vco_f;
72         u32 best_M, best_N;
73         u32 low_PL, high_PL, best_PL;
74         u32 m, n, n2;
75         u32 target_vco_f, vco_f;
76         u32 ref_clk_f, target_clk_f, u_f;
77         u32 delta, lwv, best_delta = ~0;
78         u32 pl;
79
80         BUG_ON(target_freq == NULL);
81
82         gk20a_dbg_fn("request target freq %d MHz", *target_freq);
83
84         ref_clk_f = pll->clk_in;
85         target_clk_f = *target_freq;
86         max_vco_f = pll_params->max_vco;
87         min_vco_f = pll_params->min_vco;
88         best_M = pll_params->max_M;
89         best_N = pll_params->min_N;
90         best_PL = pll_params->min_PL;
91
92         target_vco_f = target_clk_f + target_clk_f / 50;
93         if (max_vco_f < target_vco_f)
94                 max_vco_f = target_vco_f;
95
96         /* Set PL search boundaries. */
97         high_PL = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
98         high_PL = min(high_PL, pll_params->max_PL);
99         high_PL = max(high_PL, pll_params->min_PL);
100
101         low_PL = div_to_pl(min_vco_f / target_vco_f);
102         low_PL = min(low_PL, pll_params->max_PL);
103         low_PL = max(low_PL, pll_params->min_PL);
104
105         gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
106                         low_PL, pl_to_div(low_PL), high_PL, pl_to_div(high_PL));
107
108         for (pl = low_PL; pl <= high_PL; pl++) {
109                 target_vco_f = target_clk_f * pl_to_div(pl);
110
111                 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
112                         u_f = ref_clk_f / m;
113
114                         if (u_f < pll_params->min_u)
115                                 break;
116                         if (u_f > pll_params->max_u)
117                                 continue;
118
119                         n = (target_vco_f * m) / ref_clk_f;
120                         n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
121
122                         if (n > pll_params->max_N)
123                                 break;
124
125                         for (; n <= n2; n++) {
126                                 if (n < pll_params->min_N)
127                                         continue;
128                                 if (n > pll_params->max_N)
129                                         break;
130
131                                 vco_f = ref_clk_f * n / m;
132
133                                 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
134                                         lwv = (vco_f + (pl_to_div(pl) / 2))
135                                                 / pl_to_div(pl);
136                                         delta = abs(lwv - target_clk_f);
137
138                                         if (delta < best_delta) {
139                                                 best_delta = delta;
140                                                 best_M = m;
141                                                 best_N = n;
142                                                 best_PL = pl;
143
144                                                 if (best_delta == 0 ||
145                                                     /* 0.45% for non best fit */
146                                                     (!best_fit && (vco_f / best_delta > 218))) {
147                                                         goto found_match;
148                                                 }
149
150                                                 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
151                                                         delta, m, n, pl);
152                                         }
153                                 }
154                         }
155                 }
156         }
157
158 found_match:
159         BUG_ON(best_delta == ~0);
160
161         if (best_fit && best_delta != 0)
162                 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
163                         target_clk_f);
164
165         pll->M = best_M;
166         pll->N = best_N;
167         pll->PL = best_PL;
168
169         /* save current frequency */
170         pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div(pll->PL));
171
172         *target_freq = pll->freq;
173
174         gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
175                 *target_freq, pll->M, pll->N, pll->PL, pl_to_div(pll->PL));
176
177         gk20a_dbg_fn("done");
178
179         return 0;
180 }
181
182 static void clk_setup_slide(struct gk20a *g, u32 clk_u)
183 {
184         u32 data, step_a, step_b;
185
186         switch (clk_u) {
187         case 12000:
188         case 12800:
189         case 13000:                     /* only on FPGA */
190                 step_a = 0x2B;
191                 step_b = 0x0B;
192                 break;
193         case 19200:
194                 step_a = 0x12;
195                 step_b = 0x08;
196                 break;
197         case 38400:
198                 step_a = 0x04;
199                 step_b = 0x05;
200                 break;
201         default:
202                 gk20a_err(dev_from_gk20a(g), "Unexpected reference rate %u kHz",
203                           clk_u);
204                 BUG();
205         }
206
207         /* setup */
208         data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
209         data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
210                         trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
211         gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
212         data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
213         data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
214                         trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
215         gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
216 }
217
218 static int clk_slide_gpc_pll(struct gk20a *g, u32 n)
219 {
220         u32 data, coeff;
221         u32 nold, m;
222         int ramp_timeout = 500;
223
224         /* get old coefficients */
225         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
226         nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
227
228         /* do nothing if NDIV is same */
229         if (n == nold)
230                 return 0;
231
232         /* dynamic ramp setup based on update rate */
233         m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
234         clk_setup_slide(g, g->clk.gpc_pll.clk_in / m);
235
236         /* pll slowdown mode */
237         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
238         data = set_field(data,
239                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
240                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
241         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
242
243         /* new ndiv ready for ramp */
244         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
245         coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
246                         trim_sys_gpcpll_coeff_ndiv_f(n));
247         udelay(1);
248         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
249
250         /* dynamic ramp to new ndiv */
251         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
252         data = set_field(data,
253                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
254                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
255         udelay(1);
256         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
257
258         do {
259                 udelay(1);
260                 ramp_timeout--;
261                 data = gk20a_readl(
262                         g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
263                 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
264                         break;
265         } while (ramp_timeout > 0);
266
267         /* exit slowdown mode */
268         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
269         data = set_field(data,
270                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
271                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
272         data = set_field(data,
273                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
274                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
275         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
276         gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
277
278         if (ramp_timeout <= 0) {
279                 gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
280                 return -ETIMEDOUT;
281         }
282         return 0;
283 }
284
285 static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, u32 m, u32 n, u32 pl)
286 {
287         u32 data, cfg, coeff, timeout;
288
289         /* put PLL in bypass before programming it */
290         data = gk20a_readl(g, trim_sys_sel_vco_r());
291         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
292                 trim_sys_sel_vco_gpc2clk_out_bypass_f());
293         gk20a_writel(g, trim_sys_sel_vco_r(), data);
294
295         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
296         if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
297                 /* get out from IDDQ (1st power up) */
298                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
299                                 trim_sys_gpcpll_cfg_iddq_power_on_v());
300                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
301                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
302                 udelay(5);
303         } else {
304                 /* clear SYNC_MODE before disabling PLL */
305                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
306                                 trim_sys_gpcpll_cfg_sync_mode_disable_f());
307                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
308                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
309
310                 /* disable running PLL before changing coefficients */
311                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
312                                 trim_sys_gpcpll_cfg_enable_no_f());
313                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
314                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
315         }
316
317         /* change coefficients */
318         coeff = trim_sys_gpcpll_coeff_mdiv_f(m) |
319                 trim_sys_gpcpll_coeff_ndiv_f(n) |
320                 trim_sys_gpcpll_coeff_pldiv_f(pl);
321         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
322
323         /* enable PLL after changing coefficients */
324         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
325         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
326                         trim_sys_gpcpll_cfg_enable_yes_f());
327         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
328
329         /* lock pll */
330         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
331         if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
332                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
333                         trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
334                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
335         }
336
337         /* wait pll lock */
338         timeout = g->clk.pll_delay / 2 + 1;
339         do {
340                 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
341                 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
342                         goto pll_locked;
343                 udelay(2);
344         } while (--timeout > 0);
345
346         /* PLL is messed up. What can we do here? */
347         BUG();
348         return -EBUSY;
349
350 pll_locked:
351         gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
352                 trim_sys_gpcpll_cfg_r(), cfg);
353
354         /* set SYNC_MODE for glitchless switch out of bypass */
355         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
356                         trim_sys_gpcpll_cfg_sync_mode_enable_f());
357         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
358         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
359
360         /* put PLL back on vco */
361         data = gk20a_readl(g, trim_sys_sel_vco_r());
362         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
363                 trim_sys_sel_vco_gpc2clk_out_vco_f());
364         gk20a_writel(g, trim_sys_sel_vco_r(), data);
365
366         return 0;
367 }
368
369 static int clk_program_gpc_pll(struct gk20a *g, struct clk_gk20a *clk,
370                         int allow_slide)
371 {
372 #if PLDIV_GLITCHLESS
373         bool skip_bypass;
374 #else
375         u32 data;
376 #endif
377         u32 cfg, coeff;
378         u32 m, n, pl, nlo;
379         bool can_slide;
380
381         gk20a_dbg_fn("");
382
383         if (!tegra_platform_is_silicon())
384                 return 0;
385
386         /* get old coefficients */
387         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
388         m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
389         n = trim_sys_gpcpll_coeff_ndiv_v(coeff);
390         pl = trim_sys_gpcpll_coeff_pldiv_v(coeff);
391
392         /* do NDIV slide if there is no change in M and PL */
393         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
394         can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
395
396         if (can_slide && (clk->gpc_pll.M == m) && (clk->gpc_pll.PL == pl))
397                 return clk_slide_gpc_pll(g, clk->gpc_pll.N);
398
399         /* slide down to NDIV_LO */
400         nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in);
401         if (can_slide) {
402                 int ret = clk_slide_gpc_pll(g, nlo);
403                 if (ret)
404                         return ret;
405         }
406
407 #if PLDIV_GLITCHLESS
408         /*
409          * Limit either FO-to-FO (path A below) or FO-to-bypass (path B below)
410          * jump to min_vco/2 by setting post divider >= 1:2.
411          */
412         skip_bypass = can_slide && (clk->gpc_pll.M == m);
413         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
414         if ((skip_bypass && (clk->gpc_pll.PL < 2)) || (pl < 2)) {
415                 if (pl != 2) {
416                         coeff = set_field(coeff,
417                                 trim_sys_gpcpll_coeff_pldiv_m(),
418                                 trim_sys_gpcpll_coeff_pldiv_f(2));
419                         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
420                         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
421                         udelay(2);
422                 }
423         }
424
425         if (skip_bypass)
426                 goto set_pldiv; /* path A: no need to bypass */
427
428         /* path B: bypass if either M changes or PLL is disabled */
429 #else
430         /* split FO-to-bypass jump in halfs by setting out divider 1:2 */
431         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
432         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
433                 trim_sys_gpc2clk_out_vcodiv_f(2));
434         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
435         gk20a_readl(g, trim_sys_gpc2clk_out_r());
436         udelay(2);
437 #endif
438         /*
439          * Program and lock pll under bypass. On exit PLL is out of bypass,
440          * enabled, and locked. VCO is at vco_min if sliding is allowed.
441          * Otherwise it is at VCO target (and therefore last slide call below
442          * is effectively NOP). PL is preserved (not set to target) of post
443          * divider is glitchless. Otherwise it is at PL target.
444          */
445         m = clk->gpc_pll.M;
446         nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in);
447         n = allow_slide ? nlo : clk->gpc_pll.N;
448 #if PLDIV_GLITCHLESS
449         pl = (clk->gpc_pll.PL < 2) ? 2 : clk->gpc_pll.PL;
450 #else
451         pl = clk->gpc_pll.PL;
452 #endif
453         clk_lock_gpc_pll_under_bypass(g, m, n, pl);
454         clk->gpc_pll.enabled = true;
455
456 #if PLDIV_GLITCHLESS
457         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
458         udelay(2);
459
460 set_pldiv:
461         /* coeff must be current from either path A or B */
462         if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != clk->gpc_pll.PL) {
463                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
464                         trim_sys_gpcpll_coeff_pldiv_f(clk->gpc_pll.PL));
465                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
466         }
467 #else
468         /* restore out divider 1:1 */
469         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
470         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
471                 trim_sys_gpc2clk_out_vcodiv_by1_f());
472         udelay(2);
473         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
474 #endif
475         /* slide up to target NDIV */
476         return clk_slide_gpc_pll(g, clk->gpc_pll.N);
477 }
478
479 static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
480 {
481         u32 cfg, coeff, m, nlo;
482         struct clk_gk20a *clk = &g->clk;
483
484         /* slide to VCO min */
485         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
486         if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
487                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
488                 m = trim_sys_gpcpll_coeff_mdiv_v(coeff);
489                 nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco,
490                                    clk->gpc_pll.clk_in);
491                 clk_slide_gpc_pll(g, nlo);
492         }
493
494         /* put PLL in bypass before disabling it */
495         cfg = gk20a_readl(g, trim_sys_sel_vco_r());
496         cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
497                         trim_sys_sel_vco_gpc2clk_out_bypass_f());
498         gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
499
500         /* clear SYNC_MODE before disabling PLL */
501         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
502         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
503                         trim_sys_gpcpll_cfg_sync_mode_disable_f());
504         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
505
506         /* disable PLL */
507         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
508         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
509                         trim_sys_gpcpll_cfg_enable_no_f());
510         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
511         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
512
513         clk->gpc_pll.enabled = false;
514         return 0;
515 }
516
517 static int gm20b_init_clk_reset_enable_hw(struct gk20a *g)
518 {
519         gk20a_dbg_fn("");
520         return 0;
521 }
522
523 struct clk *gm20b_clk_get(struct gk20a *g)
524 {
525         if (!g->clk.tegra_clk) {
526                 struct clk *clk;
527
528                 clk = clk_get_sys("tegra_gk20a", "gpu");
529                 if (IS_ERR(clk)) {
530                         gk20a_err(dev_from_gk20a(g),
531                                 "fail to get tegra gpu clk tegra_gk20a/gpu");
532                         return NULL;
533                 }
534                 g->clk.tegra_clk = clk;
535         }
536
537         return g->clk.tegra_clk;
538 }
539
540 static int gm20b_init_clk_setup_sw(struct gk20a *g)
541 {
542         struct clk_gk20a *clk = &g->clk;
543         static int initialized;
544         struct clk *ref;
545         unsigned long ref_rate;
546
547         gk20a_dbg_fn("");
548
549         if (clk->sw_ready) {
550                 gk20a_dbg_fn("skip init");
551                 return 0;
552         }
553
554         if (!gk20a_clk_get(g))
555                 return -EINVAL;
556
557         ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
558         if (IS_ERR(ref)) {
559                 gk20a_err(dev_from_gk20a(g),
560                         "failed to get GPCPLL reference clock");
561                 return -EINVAL;
562         }
563         ref_rate = clk_get_rate(ref);
564
565         clk->pll_delay = 300; /* usec */
566
567         clk->gpc_pll.id = GK20A_GPC_PLL;
568         clk->gpc_pll.clk_in = ref_rate / KHZ;
569
570         /* Initial frequency: 1/3 VCO min (low enough to be safe at Vmin) */
571         if (!initialized) {
572                 initialized = 1;
573                 clk->gpc_pll.M = 1;
574                 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
575                                         clk->gpc_pll.clk_in);
576                 clk->gpc_pll.PL = 3;
577                 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
578                 clk->gpc_pll.freq /= pl_to_div(clk->gpc_pll.PL);
579         }
580
581         mutex_init(&clk->clk_mutex);
582
583         clk->sw_ready = true;
584
585         gk20a_dbg_fn("done");
586         return 0;
587 }
588
589 static int gm20b_init_clk_setup_hw(struct gk20a *g)
590 {
591         u32 data;
592
593         gk20a_dbg_fn("");
594
595         /* LDIV: Div4 mode (required); both  bypass and vco ratios 1:1 */
596         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
597         data = set_field(data,
598                         trim_sys_gpc2clk_out_sdiv14_m() |
599                         trim_sys_gpc2clk_out_vcodiv_m() |
600                         trim_sys_gpc2clk_out_bypdiv_m(),
601                         trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
602                         trim_sys_gpc2clk_out_vcodiv_by1_f() |
603                         trim_sys_gpc2clk_out_bypdiv_f(0));
604         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
605
606         /*
607          * Clear global bypass control; PLL is still under bypass, since SEL_VCO
608          * is cleared by default.
609          */
610         data = gk20a_readl(g, trim_sys_bypassctrl_r());
611         data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
612                          trim_sys_bypassctrl_gpcpll_vco_f());
613         gk20a_writel(g, trim_sys_bypassctrl_r(), data);
614
615         return 0;
616 }
617
618 static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
619 {
620         struct clk_gk20a *clk = &g->clk;
621
622         if (freq > gpc_pll_params.max_freq)
623                 freq = gpc_pll_params.max_freq;
624         else if (freq < gpc_pll_params.min_freq)
625                 freq = gpc_pll_params.min_freq;
626
627         if (freq != old_freq) {
628                 /* gpc_pll.freq is changed to new value here */
629                 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
630                                    &freq, true)) {
631                         gk20a_err(dev_from_gk20a(g),
632                                    "failed to set pll target for %d", freq);
633                         return -EINVAL;
634                 }
635         }
636         return 0;
637 }
638
639 static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
640 {
641         struct clk_gk20a *clk = &g->clk;
642         int err = 0;
643
644         gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);
645
646         if ((freq == old_freq) && clk->gpc_pll.enabled)
647                 return 0;
648
649         /* change frequency only if power is on */
650         if (g->clk.clk_hw_on) {
651                 err = clk_program_gpc_pll(g, clk, 1);
652                 if (err)
653                         err = clk_program_gpc_pll(g, clk, 0);
654         }
655
656         /* Just report error but not restore PLL since dvfs could already change
657             voltage even when it returns error. */
658         if (err)
659                 gk20a_err(dev_from_gk20a(g),
660                         "failed to set pll to %d", freq);
661         return err;
662 }
663
664 static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
665 {
666         u32 old_freq;
667         int ret = -ENODATA;
668         struct gk20a *g = data;
669         struct clk_gk20a *clk = &g->clk;
670
671         if (rate) {
672                 mutex_lock(&clk->clk_mutex);
673                 old_freq = clk->gpc_pll.freq;
674                 ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
675                 if (!ret && clk->gpc_pll.enabled)
676                         ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
677                 if (!ret)
678                         *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
679                 mutex_unlock(&clk->clk_mutex);
680         }
681         return ret;
682 }
683
684 static int gm20b_clk_export_enable(void *data)
685 {
686         int ret;
687         struct gk20a *g = data;
688         struct clk_gk20a *clk = &g->clk;
689
690         mutex_lock(&clk->clk_mutex);
691         ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
692         mutex_unlock(&clk->clk_mutex);
693         return ret;
694 }
695
696 static void gm20b_clk_export_disable(void *data)
697 {
698         struct gk20a *g = data;
699         struct clk_gk20a *clk = &g->clk;
700
701         mutex_lock(&clk->clk_mutex);
702         if (g->clk.clk_hw_on)
703                 clk_disable_gpcpll(g, 1);
704         mutex_unlock(&clk->clk_mutex);
705 }
706
707 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
708 {
709         struct gk20a *g = data;
710         struct clk_gk20a *clk = &g->clk;
711
712         mutex_lock(&clk->clk_mutex);
713         if (state)
714                 *state = clk->gpc_pll.enabled;
715         if (rate)
716                 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
717         mutex_unlock(&clk->clk_mutex);
718 }
719
720 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
721         .init = gm20b_clk_export_init,
722         .enable = gm20b_clk_export_enable,
723         .disable = gm20b_clk_export_disable,
724         .set_rate = gm20b_clk_export_set_rate,
725 };
726
727 static int gm20b_clk_register_export_ops(struct gk20a *g)
728 {
729         int ret;
730         struct clk *c;
731
732         if (gm20b_clk_export_ops.data)
733                 return 0;
734
735         gm20b_clk_export_ops.data = (void *)g;
736         c = g->clk.tegra_clk;
737         if (!c || !clk_get_parent(c))
738                 return -ENOSYS;
739
740         ret = tegra_clk_register_export_ops(clk_get_parent(c),
741                                             &gm20b_clk_export_ops);
742
743         return ret;
744 }
745
746 static int gm20b_init_clk_support(struct gk20a *g)
747 {
748         struct clk_gk20a *clk = &g->clk;
749         u32 err;
750
751         gk20a_dbg_fn("");
752
753         clk->g = g;
754
755         err = gm20b_init_clk_reset_enable_hw(g);
756         if (err)
757                 return err;
758
759         err = gm20b_init_clk_setup_sw(g);
760         if (err)
761                 return err;
762
763         mutex_lock(&clk->clk_mutex);
764         clk->clk_hw_on = true;
765
766         err = gm20b_init_clk_setup_hw(g);
767         mutex_unlock(&clk->clk_mutex);
768         if (err)
769                 return err;
770
771         err = gm20b_clk_register_export_ops(g);
772         if (err)
773                 return err;
774
775         /* FIXME: this effectively prevents host level clock gating */
776         err = clk_enable(g->clk.tegra_clk);
777         if (err)
778                 return err;
779
780         /* The prev call may not enable PLL if gbus is unbalanced - force it */
781         mutex_lock(&clk->clk_mutex);
782         err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
783         mutex_unlock(&clk->clk_mutex);
784         if (err)
785                 return err;
786
787 #ifdef CONFIG_DEBUG_FS
788         if (!clk->debugfs_set) {
789                 if (!clk_gm20b_debugfs_init(g))
790                         clk->debugfs_set = true;
791         }
792 #endif
793         return err;
794 }
795
796 static int gm20b_suspend_clk_support(struct gk20a *g)
797 {
798         int ret;
799
800         clk_disable(g->clk.tegra_clk);
801
802         /* The prev call may not disable PLL if gbus is unbalanced - force it */
803         mutex_lock(&g->clk.clk_mutex);
804         ret = clk_disable_gpcpll(g, 1);
805         g->clk.clk_hw_on = false;
806         mutex_unlock(&g->clk.clk_mutex);
807         return ret;
808 }
809
810 void gm20b_init_clk_ops(struct gpu_ops *gops)
811 {
812         gops->clk.init_clk_support = gm20b_init_clk_support;
813         gops->clk.suspend_clk_support = gm20b_suspend_clk_support;
814 }
815
816 #ifdef CONFIG_DEBUG_FS
817
818 static int rate_get(void *data, u64 *val)
819 {
820         struct gk20a *g = (struct gk20a *)data;
821         *val = (u64)gk20a_clk_get_rate(g);
822         return 0;
823 }
824 static int rate_set(void *data, u64 val)
825 {
826         struct gk20a *g = (struct gk20a *)data;
827         return gk20a_clk_set_rate(g, (u32)val);
828 }
829 DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
830
831 static int pll_reg_show(struct seq_file *s, void *data)
832 {
833         struct gk20a *g = s->private;
834         u32 reg, m, n, pl, f;
835
836         mutex_lock(&g->clk.clk_mutex);
837         if (!g->clk.clk_hw_on) {
838                 seq_printf(s, "gk20a powered down - no access to registers\n");
839                 mutex_unlock(&g->clk.clk_mutex);
840                 return 0;
841         }
842
843         reg = gk20a_readl(g, trim_sys_bypassctrl_r());
844         seq_printf(s, "bypassctrl = %s, ", reg ? "bypass" : "vco");
845         reg = gk20a_readl(g, trim_sys_sel_vco_r());
846         seq_printf(s, "sel_vco = %s, ", reg ? "vco" : "bypass");
847
848         reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
849         seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", reg,
850                 trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
851                 trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked",
852                 trim_sys_gpcpll_cfg_sync_mode_v(reg) ? "sync_on" : "sync_off");
853
854         reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
855         m = trim_sys_gpcpll_coeff_mdiv_v(reg);
856         n = trim_sys_gpcpll_coeff_ndiv_v(reg);
857         pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
858         f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
859         seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
860         seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
861         mutex_unlock(&g->clk.clk_mutex);
862         return 0;
863 }
864
865 static int pll_reg_open(struct inode *inode, struct file *file)
866 {
867         return single_open(file, pll_reg_show, inode->i_private);
868 }
869
870 static const struct file_operations pll_reg_fops = {
871         .open           = pll_reg_open,
872         .read           = seq_read,
873         .llseek         = seq_lseek,
874         .release        = single_release,
875 };
876
877 static int monitor_get(void *data, u64 *val)
878 {
879         struct gk20a *g = (struct gk20a *)data;
880         struct clk_gk20a *clk = &g->clk;
881         u32 clk_slowdown, clk_slowdown_save;
882         int err;
883
884         u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */
885         u64 freq = clk->gpc_pll.clk_in;
886         u32 count1, count2;
887
888         err = gk20a_busy(g->dev);
889         if (err)
890                 return err;
891
892         mutex_lock(&g->clk.clk_mutex);
893
894         /* Disable clock slowdown during measurements */
895         clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
896         clk_slowdown = set_field(clk_slowdown_save,
897                                  therm_clk_slowdown_idle_factor_m(),
898                                  therm_clk_slowdown_idle_factor_disabled_f());
899         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
900         gk20a_readl(g, therm_clk_slowdown_r(0));
901
902         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
903                      trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
904         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
905                      trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
906                      trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
907                      trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
908         /* start */
909
910         /* It should take less than 5us to finish 100 cycle of 38.4MHz.
911            But longer than 100us delay is required here. */
912         gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
913         udelay(200);
914
915         count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
916         udelay(100);
917         count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
918         freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
919         do_div(freq, ncycle);
920         *val = freq;
921
922         /* Restore clock slowdown */
923         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
924         mutex_unlock(&g->clk.clk_mutex);
925
926         gk20a_idle(g->dev);
927
928         if (count1 != count2)
929                 return -EBUSY;
930         return 0;
931 }
932 DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
933
934 static int clk_gm20b_debugfs_init(struct gk20a *g)
935 {
936         struct dentry *d;
937         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
938
939         d = debugfs_create_file(
940                 "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
941         if (!d)
942                 goto err_out;
943
944         d = debugfs_create_file(
945                 "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
946         if (!d)
947                 goto err_out;
948
949         d = debugfs_create_file(
950                 "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
951         if (!d)
952                 goto err_out;
953
954         return 0;
955
956 err_out:
957         pr_err("%s: Failed to make debugfs node\n", __func__);
958         debugfs_remove_recursive(platform->debugfs);
959         return -ENOMEM;
960 }
961
962 #endif /* CONFIG_DEBUG_FS */