gpu: nvgpu: Re-factor GM20b clk_slide_gpc_pll()
[linux-3.10.git] / drivers / gpu / nvgpu / gm20b / clk_gm20b.c
1 /*
2  * GM20B Clocks
3  *
4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/clk.h>
20 #include <linux/delay.h>        /* for mdelay */
21 #include <linux/module.h>
22 #include <linux/debugfs.h>
23 #include <linux/uaccess.h>
24 #include <linux/clk/tegra.h>
25
26 #include "gk20a/gk20a.h"
27 #include "hw_trim_gm20b.h"
28 #include "hw_timer_gm20b.h"
29 #include "hw_therm_gm20b.h"
30 #include "clk_gm20b.h"
31
32 #define gk20a_dbg_clk(fmt, arg...) \
33         gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
34
35 /* from vbios PLL info table */
36 static struct pll_parms gpc_pll_params = {
37         128000,  2600000,       /* freq */
38         1300000, 2600000,       /* vco */
39         12000,   38400,         /* u */
40         1, 255,                 /* M */
41         8, 255,                 /* N */
42         1, 31,                  /* PL */
43 };
44
45 #ifdef CONFIG_DEBUG_FS
46 static int clk_gm20b_debugfs_init(struct gk20a *g);
47 #endif
48
49 /* 1:1 match between post divider settings and divisor value */
50 static inline u32 pl_to_div(u32 pl)
51 {
52         return pl;
53 }
54
55 static inline u32 div_to_pl(u32 div)
56 {
57         return div;
58 }
59
60 /* FIXME: remove after on-silicon testing */
61 #define PLDIV_GLITCHLESS 1
62
63 /* Calculate and update M/N/PL as well as pll->freq
64     ref_clk_f = clk_in_f;
65     u_f = ref_clk_f / M;
66     vco_f = u_f * N = ref_clk_f * N / M;
67     PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
68     gpcclk = gpc2clk / 2; */
69 static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
70         struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
71 {
72         u32 min_vco_f, max_vco_f;
73         u32 best_M, best_N;
74         u32 low_PL, high_PL, best_PL;
75         u32 m, n, n2;
76         u32 target_vco_f, vco_f;
77         u32 ref_clk_f, target_clk_f, u_f;
78         u32 delta, lwv, best_delta = ~0;
79         u32 pl;
80
81         BUG_ON(target_freq == NULL);
82
83         gk20a_dbg_fn("request target freq %d MHz", *target_freq);
84
85         ref_clk_f = pll->clk_in;
86         target_clk_f = *target_freq;
87         max_vco_f = pll_params->max_vco;
88         min_vco_f = pll_params->min_vco;
89         best_M = pll_params->max_M;
90         best_N = pll_params->min_N;
91         best_PL = pll_params->min_PL;
92
93         target_vco_f = target_clk_f + target_clk_f / 50;
94         if (max_vco_f < target_vco_f)
95                 max_vco_f = target_vco_f;
96
97         /* Set PL search boundaries. */
98         high_PL = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
99         high_PL = min(high_PL, pll_params->max_PL);
100         high_PL = max(high_PL, pll_params->min_PL);
101
102         low_PL = div_to_pl(min_vco_f / target_vco_f);
103         low_PL = min(low_PL, pll_params->max_PL);
104         low_PL = max(low_PL, pll_params->min_PL);
105
106         gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
107                         low_PL, pl_to_div(low_PL), high_PL, pl_to_div(high_PL));
108
109         for (pl = low_PL; pl <= high_PL; pl++) {
110                 target_vco_f = target_clk_f * pl_to_div(pl);
111
112                 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
113                         u_f = ref_clk_f / m;
114
115                         if (u_f < pll_params->min_u)
116                                 break;
117                         if (u_f > pll_params->max_u)
118                                 continue;
119
120                         n = (target_vco_f * m) / ref_clk_f;
121                         n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
122
123                         if (n > pll_params->max_N)
124                                 break;
125
126                         for (; n <= n2; n++) {
127                                 if (n < pll_params->min_N)
128                                         continue;
129                                 if (n > pll_params->max_N)
130                                         break;
131
132                                 vco_f = ref_clk_f * n / m;
133
134                                 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
135                                         lwv = (vco_f + (pl_to_div(pl) / 2))
136                                                 / pl_to_div(pl);
137                                         delta = abs(lwv - target_clk_f);
138
139                                         if (delta < best_delta) {
140                                                 best_delta = delta;
141                                                 best_M = m;
142                                                 best_N = n;
143                                                 best_PL = pl;
144
145                                                 if (best_delta == 0 ||
146                                                     /* 0.45% for non best fit */
147                                                     (!best_fit && (vco_f / best_delta > 218))) {
148                                                         goto found_match;
149                                                 }
150
151                                                 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
152                                                         delta, m, n, pl);
153                                         }
154                                 }
155                         }
156                 }
157         }
158
159 found_match:
160         BUG_ON(best_delta == ~0);
161
162         if (best_fit && best_delta != 0)
163                 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
164                         target_clk_f);
165
166         pll->M = best_M;
167         pll->N = best_N;
168         pll->PL = best_PL;
169
170         /* save current frequency */
171         pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div(pll->PL));
172
173         *target_freq = pll->freq;
174
175         gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
176                 *target_freq, pll->M, pll->N, pll->PL, pl_to_div(pll->PL));
177
178         gk20a_dbg_fn("done");
179
180         return 0;
181 }
182
183 static void clk_setup_slide(struct gk20a *g, u32 clk_u)
184 {
185         u32 data, step_a, step_b;
186
187         switch (clk_u) {
188         case 12000:
189         case 12800:
190         case 13000:                     /* only on FPGA */
191                 step_a = 0x2B;
192                 step_b = 0x0B;
193                 break;
194         case 19200:
195                 step_a = 0x12;
196                 step_b = 0x08;
197                 break;
198         case 38400:
199                 step_a = 0x04;
200                 step_b = 0x05;
201                 break;
202         default:
203                 gk20a_err(dev_from_gk20a(g), "Unexpected reference rate %u kHz",
204                           clk_u);
205                 BUG();
206         }
207
208         /* setup */
209         data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
210         data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
211                         trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
212         gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
213         data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
214         data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
215                         trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
216         gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
217 }
218
219 static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll)
220 {
221         u32 data, coeff;
222         u32 nold;
223         int ramp_timeout = 500;
224
225         /* get old coefficients */
226         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
227         nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
228
229         /* do nothing if NDIV is same */
230         if (gpll->N == nold)
231                 return 0;
232
233         /* dynamic ramp setup based on update rate */
234         clk_setup_slide(g, gpll->clk_in / gpll->M);
235
236         /* pll slowdown mode */
237         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
238         data = set_field(data,
239                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
240                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
241         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
242
243         /* new ndiv ready for ramp */
244         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
245         coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
246                         trim_sys_gpcpll_coeff_ndiv_f(gpll->N));
247         udelay(1);
248         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
249
250         /* dynamic ramp to new ndiv */
251         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
252         data = set_field(data,
253                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
254                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
255         udelay(1);
256         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
257
258         do {
259                 udelay(1);
260                 ramp_timeout--;
261                 data = gk20a_readl(
262                         g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
263                 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
264                         break;
265         } while (ramp_timeout > 0);
266
267         /* exit slowdown mode */
268         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
269         data = set_field(data,
270                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
271                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
272         data = set_field(data,
273                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
274                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
275         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
276         gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
277
278         if (ramp_timeout <= 0) {
279                 gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
280                 return -ETIMEDOUT;
281         }
282         return 0;
283 }
284
285 static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll)
286 {
287         u32 data, cfg, coeff, timeout;
288
289         /* put PLL in bypass before programming it */
290         data = gk20a_readl(g, trim_sys_sel_vco_r());
291         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
292                 trim_sys_sel_vco_gpc2clk_out_bypass_f());
293         gk20a_writel(g, trim_sys_sel_vco_r(), data);
294
295         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
296         if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
297                 /* get out from IDDQ (1st power up) */
298                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
299                                 trim_sys_gpcpll_cfg_iddq_power_on_v());
300                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
301                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
302                 udelay(5);
303         } else {
304                 /* clear SYNC_MODE before disabling PLL */
305                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
306                                 trim_sys_gpcpll_cfg_sync_mode_disable_f());
307                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
308                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
309
310                 /* disable running PLL before changing coefficients */
311                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
312                                 trim_sys_gpcpll_cfg_enable_no_f());
313                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
314                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
315         }
316
317         /* change coefficients */
318         coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
319                 trim_sys_gpcpll_coeff_ndiv_f(gpll->N) |
320                 trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
321         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
322
323         /* enable PLL after changing coefficients */
324         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
325         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
326                         trim_sys_gpcpll_cfg_enable_yes_f());
327         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
328
329         /* lock pll */
330         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
331         if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
332                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
333                         trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
334                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
335         }
336
337         /* wait pll lock */
338         timeout = g->clk.pll_delay / 2 + 1;
339         do {
340                 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
341                 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
342                         goto pll_locked;
343                 udelay(2);
344         } while (--timeout > 0);
345
346         /* PLL is messed up. What can we do here? */
347         BUG();
348         return -EBUSY;
349
350 pll_locked:
351         gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
352                 trim_sys_gpcpll_cfg_r(), cfg);
353
354         /* set SYNC_MODE for glitchless switch out of bypass */
355         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
356                         trim_sys_gpcpll_cfg_sync_mode_enable_f());
357         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
358         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
359
360         /* put PLL back on vco */
361         data = gk20a_readl(g, trim_sys_sel_vco_r());
362         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
363                 trim_sys_sel_vco_gpc2clk_out_vco_f());
364         gk20a_writel(g, trim_sys_sel_vco_r(), data);
365
366         return 0;
367 }
368
369 static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new,
370                         int allow_slide)
371 {
372 #if PLDIV_GLITCHLESS
373         bool skip_bypass;
374 #else
375         u32 data;
376 #endif
377         u32 cfg, coeff;
378         bool can_slide;
379         struct pll gpll;
380
381         gk20a_dbg_fn("");
382
383         if (!tegra_platform_is_silicon())
384                 return 0;
385
386         /* get old coefficients */
387         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
388         gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
389         gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff);
390         gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff);
391         gpll.clk_in = gpll_new->clk_in;
392
393         /* do NDIV slide if there is no change in M and PL */
394         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
395         can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
396
397         if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL))
398                 return clk_slide_gpc_pll(g, gpll_new);
399
400         /* slide down to NDIV_LO */
401         if (can_slide) {
402                 int ret;
403                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
404                                       gpll.clk_in);
405                 ret = clk_slide_gpc_pll(g, &gpll);
406                 if (ret)
407                         return ret;
408         }
409
410 #if PLDIV_GLITCHLESS
411         /*
412          * Limit either FO-to-FO (path A below) or FO-to-bypass (path B below)
413          * jump to min_vco/2 by setting post divider >= 1:2.
414          */
415         skip_bypass = can_slide && (gpll_new->M == gpll.M);
416         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
417         if ((skip_bypass && (gpll_new->PL < 2)) || (gpll.PL < 2)) {
418                 if (gpll.PL != 2) {
419                         coeff = set_field(coeff,
420                                 trim_sys_gpcpll_coeff_pldiv_m(),
421                                 trim_sys_gpcpll_coeff_pldiv_f(2));
422                         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
423                         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
424                         udelay(2);
425                 }
426         }
427
428         if (skip_bypass)
429                 goto set_pldiv; /* path A: no need to bypass */
430
431         /* path B: bypass if either M changes or PLL is disabled */
432 #else
433         /* split FO-to-bypass jump in halfs by setting out divider 1:2 */
434         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
435         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
436                 trim_sys_gpc2clk_out_vcodiv_f(2));
437         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
438         gk20a_readl(g, trim_sys_gpc2clk_out_r());
439         udelay(2);
440 #endif
441         /*
442          * Program and lock pll under bypass. On exit PLL is out of bypass,
443          * enabled, and locked. VCO is at vco_min if sliding is allowed.
444          * Otherwise it is at VCO target (and therefore last slide call below
445          * is effectively NOP). PL is preserved (not set to target) of post
446          * divider is glitchless. Otherwise it is at PL target.
447          */
448         gpll = *gpll_new;
449         if (allow_slide)
450                 gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco,
451                                       gpll_new->clk_in);
452 #if PLDIV_GLITCHLESS
453         gpll.PL = (gpll_new->PL < 2) ? 2 : gpll_new->PL;
454 #endif
455         clk_lock_gpc_pll_under_bypass(g, &gpll);
456         gpll_new->enabled = true;
457
458 #if PLDIV_GLITCHLESS
459         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
460         udelay(2);
461
462 set_pldiv:
463         /* coeff must be current from either path A or B */
464         if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) {
465                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
466                         trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL));
467                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
468         }
469 #else
470         /* restore out divider 1:1 */
471         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
472         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
473                 trim_sys_gpc2clk_out_vcodiv_by1_f());
474         udelay(2);
475         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
476 #endif
477         /* slide up to target NDIV */
478         return clk_slide_gpc_pll(g, gpll_new);
479 }
480
481 static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
482 {
483         u32 cfg, coeff;
484         struct clk_gk20a *clk = &g->clk;
485         struct pll gpll = clk->gpc_pll;
486
487         /* slide to VCO min */
488         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
489         if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
490                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
491                 gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
492                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
493                                       gpll.clk_in);
494                 clk_slide_gpc_pll(g, &gpll);
495         }
496
497         /* put PLL in bypass before disabling it */
498         cfg = gk20a_readl(g, trim_sys_sel_vco_r());
499         cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
500                         trim_sys_sel_vco_gpc2clk_out_bypass_f());
501         gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
502
503         /* clear SYNC_MODE before disabling PLL */
504         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
505         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
506                         trim_sys_gpcpll_cfg_sync_mode_disable_f());
507         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
508
509         /* disable PLL */
510         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
511         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
512                         trim_sys_gpcpll_cfg_enable_no_f());
513         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
514         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
515
516         clk->gpc_pll.enabled = false;
517         return 0;
518 }
519
520 static int gm20b_init_clk_reset_enable_hw(struct gk20a *g)
521 {
522         gk20a_dbg_fn("");
523         return 0;
524 }
525
526 struct clk *gm20b_clk_get(struct gk20a *g)
527 {
528         if (!g->clk.tegra_clk) {
529                 struct clk *clk;
530
531                 clk = clk_get_sys("tegra_gk20a", "gpu");
532                 if (IS_ERR(clk)) {
533                         gk20a_err(dev_from_gk20a(g),
534                                 "fail to get tegra gpu clk tegra_gk20a/gpu");
535                         return NULL;
536                 }
537                 g->clk.tegra_clk = clk;
538         }
539
540         return g->clk.tegra_clk;
541 }
542
543 static int gm20b_init_clk_setup_sw(struct gk20a *g)
544 {
545         struct clk_gk20a *clk = &g->clk;
546         static int initialized;
547         struct clk *ref;
548         unsigned long ref_rate;
549
550         gk20a_dbg_fn("");
551
552         if (clk->sw_ready) {
553                 gk20a_dbg_fn("skip init");
554                 return 0;
555         }
556
557         if (!gk20a_clk_get(g))
558                 return -EINVAL;
559
560         ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
561         if (IS_ERR(ref)) {
562                 gk20a_err(dev_from_gk20a(g),
563                         "failed to get GPCPLL reference clock");
564                 return -EINVAL;
565         }
566         ref_rate = clk_get_rate(ref);
567
568         clk->pll_delay = 300; /* usec */
569
570         clk->gpc_pll.id = GK20A_GPC_PLL;
571         clk->gpc_pll.clk_in = ref_rate / KHZ;
572
573         /* Initial frequency: 1/3 VCO min (low enough to be safe at Vmin) */
574         if (!initialized) {
575                 initialized = 1;
576                 clk->gpc_pll.M = 1;
577                 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
578                                         clk->gpc_pll.clk_in);
579                 clk->gpc_pll.PL = 3;
580                 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
581                 clk->gpc_pll.freq /= pl_to_div(clk->gpc_pll.PL);
582         }
583
584         mutex_init(&clk->clk_mutex);
585
586         clk->sw_ready = true;
587
588         gk20a_dbg_fn("done");
589         return 0;
590 }
591
592 static int gm20b_init_clk_setup_hw(struct gk20a *g)
593 {
594         u32 data;
595
596         gk20a_dbg_fn("");
597
598         /* LDIV: Div4 mode (required); both  bypass and vco ratios 1:1 */
599         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
600         data = set_field(data,
601                         trim_sys_gpc2clk_out_sdiv14_m() |
602                         trim_sys_gpc2clk_out_vcodiv_m() |
603                         trim_sys_gpc2clk_out_bypdiv_m(),
604                         trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
605                         trim_sys_gpc2clk_out_vcodiv_by1_f() |
606                         trim_sys_gpc2clk_out_bypdiv_f(0));
607         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
608
609         /*
610          * Clear global bypass control; PLL is still under bypass, since SEL_VCO
611          * is cleared by default.
612          */
613         data = gk20a_readl(g, trim_sys_bypassctrl_r());
614         data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
615                          trim_sys_bypassctrl_gpcpll_vco_f());
616         gk20a_writel(g, trim_sys_bypassctrl_r(), data);
617
618         /* Disable idle slow down */
619         data = gk20a_readl(g, therm_clk_slowdown_r(0));
620         data = set_field(data, therm_clk_slowdown_idle_factor_m(),
621                          therm_clk_slowdown_idle_factor_disabled_f());
622         gk20a_writel(g, therm_clk_slowdown_r(0), data);
623         gk20a_readl(g, therm_clk_slowdown_r(0));
624
625         return 0;
626 }
627
628 static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
629 {
630         struct clk_gk20a *clk = &g->clk;
631
632         if (freq > gpc_pll_params.max_freq)
633                 freq = gpc_pll_params.max_freq;
634         else if (freq < gpc_pll_params.min_freq)
635                 freq = gpc_pll_params.min_freq;
636
637         if (freq != old_freq) {
638                 /* gpc_pll.freq is changed to new value here */
639                 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
640                                    &freq, true)) {
641                         gk20a_err(dev_from_gk20a(g),
642                                    "failed to set pll target for %d", freq);
643                         return -EINVAL;
644                 }
645         }
646         return 0;
647 }
648
649 static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
650 {
651         struct clk_gk20a *clk = &g->clk;
652         int err = 0;
653
654         gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);
655
656         if ((freq == old_freq) && clk->gpc_pll.enabled)
657                 return 0;
658
659         /* change frequency only if power is on */
660         if (g->clk.clk_hw_on) {
661                 err = clk_program_gpc_pll(g, &clk->gpc_pll, 1);
662                 if (err)
663                         err = clk_program_gpc_pll(g, &clk->gpc_pll, 0);
664         }
665
666         /* Just report error but not restore PLL since dvfs could already change
667             voltage even when it returns error. */
668         if (err)
669                 gk20a_err(dev_from_gk20a(g),
670                         "failed to set pll to %d", freq);
671         return err;
672 }
673
674 static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
675 {
676         u32 old_freq;
677         int ret = -ENODATA;
678         struct gk20a *g = data;
679         struct clk_gk20a *clk = &g->clk;
680
681         if (rate) {
682                 mutex_lock(&clk->clk_mutex);
683                 old_freq = clk->gpc_pll.freq;
684                 ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
685                 if (!ret && clk->gpc_pll.enabled)
686                         ret = set_pll_freq(g, clk->gpc_pll.freq, old_freq);
687                 if (!ret)
688                         *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
689                 mutex_unlock(&clk->clk_mutex);
690         }
691         return ret;
692 }
693
694 static int gm20b_clk_export_enable(void *data)
695 {
696         int ret;
697         struct gk20a *g = data;
698         struct clk_gk20a *clk = &g->clk;
699
700         mutex_lock(&clk->clk_mutex);
701         ret = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
702         mutex_unlock(&clk->clk_mutex);
703         return ret;
704 }
705
706 static void gm20b_clk_export_disable(void *data)
707 {
708         struct gk20a *g = data;
709         struct clk_gk20a *clk = &g->clk;
710
711         mutex_lock(&clk->clk_mutex);
712         if (g->clk.clk_hw_on)
713                 clk_disable_gpcpll(g, 1);
714         mutex_unlock(&clk->clk_mutex);
715 }
716
717 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
718 {
719         struct gk20a *g = data;
720         struct clk_gk20a *clk = &g->clk;
721
722         mutex_lock(&clk->clk_mutex);
723         if (state)
724                 *state = clk->gpc_pll.enabled;
725         if (rate)
726                 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
727         mutex_unlock(&clk->clk_mutex);
728 }
729
730 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
731         .init = gm20b_clk_export_init,
732         .enable = gm20b_clk_export_enable,
733         .disable = gm20b_clk_export_disable,
734         .set_rate = gm20b_clk_export_set_rate,
735 };
736
737 static int gm20b_clk_register_export_ops(struct gk20a *g)
738 {
739         int ret;
740         struct clk *c;
741
742         if (gm20b_clk_export_ops.data)
743                 return 0;
744
745         gm20b_clk_export_ops.data = (void *)g;
746         c = g->clk.tegra_clk;
747         if (!c || !clk_get_parent(c))
748                 return -ENOSYS;
749
750         ret = tegra_clk_register_export_ops(clk_get_parent(c),
751                                             &gm20b_clk_export_ops);
752
753         return ret;
754 }
755
756 static int gm20b_init_clk_support(struct gk20a *g)
757 {
758         struct clk_gk20a *clk = &g->clk;
759         u32 err;
760
761         gk20a_dbg_fn("");
762
763         clk->g = g;
764
765         err = gm20b_init_clk_reset_enable_hw(g);
766         if (err)
767                 return err;
768
769         err = gm20b_init_clk_setup_sw(g);
770         if (err)
771                 return err;
772
773         mutex_lock(&clk->clk_mutex);
774         clk->clk_hw_on = true;
775
776         err = gm20b_init_clk_setup_hw(g);
777         mutex_unlock(&clk->clk_mutex);
778         if (err)
779                 return err;
780
781         err = gm20b_clk_register_export_ops(g);
782         if (err)
783                 return err;
784
785         /* FIXME: this effectively prevents host level clock gating */
786         err = clk_enable(g->clk.tegra_clk);
787         if (err)
788                 return err;
789
790         /* The prev call may not enable PLL if gbus is unbalanced - force it */
791         mutex_lock(&clk->clk_mutex);
792         err = set_pll_freq(g, clk->gpc_pll.freq, clk->gpc_pll.freq);
793         mutex_unlock(&clk->clk_mutex);
794         if (err)
795                 return err;
796
797 #ifdef CONFIG_DEBUG_FS
798         if (!clk->debugfs_set) {
799                 if (!clk_gm20b_debugfs_init(g))
800                         clk->debugfs_set = true;
801         }
802 #endif
803         return err;
804 }
805
806 static int gm20b_suspend_clk_support(struct gk20a *g)
807 {
808         int ret;
809
810         clk_disable(g->clk.tegra_clk);
811
812         /* The prev call may not disable PLL if gbus is unbalanced - force it */
813         mutex_lock(&g->clk.clk_mutex);
814         ret = clk_disable_gpcpll(g, 1);
815         g->clk.clk_hw_on = false;
816         mutex_unlock(&g->clk.clk_mutex);
817         return ret;
818 }
819
820 void gm20b_init_clk_ops(struct gpu_ops *gops)
821 {
822         gops->clk.init_clk_support = gm20b_init_clk_support;
823         gops->clk.suspend_clk_support = gm20b_suspend_clk_support;
824 }
825
826 #ifdef CONFIG_DEBUG_FS
827
828 static int rate_get(void *data, u64 *val)
829 {
830         struct gk20a *g = (struct gk20a *)data;
831         *val = (u64)gk20a_clk_get_rate(g);
832         return 0;
833 }
834 static int rate_set(void *data, u64 val)
835 {
836         struct gk20a *g = (struct gk20a *)data;
837         return gk20a_clk_set_rate(g, (u32)val);
838 }
839 DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
840
841 static int pll_reg_show(struct seq_file *s, void *data)
842 {
843         struct gk20a *g = s->private;
844         u32 reg, m, n, pl, f;
845
846         mutex_lock(&g->clk.clk_mutex);
847         if (!g->clk.clk_hw_on) {
848                 seq_printf(s, "gk20a powered down - no access to registers\n");
849                 mutex_unlock(&g->clk.clk_mutex);
850                 return 0;
851         }
852
853         reg = gk20a_readl(g, trim_sys_bypassctrl_r());
854         seq_printf(s, "bypassctrl = %s, ", reg ? "bypass" : "vco");
855         reg = gk20a_readl(g, trim_sys_sel_vco_r());
856         seq_printf(s, "sel_vco = %s, ", reg ? "vco" : "bypass");
857
858         reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
859         seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", reg,
860                 trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
861                 trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked",
862                 trim_sys_gpcpll_cfg_sync_mode_v(reg) ? "sync_on" : "sync_off");
863
864         reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
865         m = trim_sys_gpcpll_coeff_mdiv_v(reg);
866         n = trim_sys_gpcpll_coeff_ndiv_v(reg);
867         pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
868         f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
869         seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
870         seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
871         mutex_unlock(&g->clk.clk_mutex);
872         return 0;
873 }
874
875 static int pll_reg_open(struct inode *inode, struct file *file)
876 {
877         return single_open(file, pll_reg_show, inode->i_private);
878 }
879
880 static const struct file_operations pll_reg_fops = {
881         .open           = pll_reg_open,
882         .read           = seq_read,
883         .llseek         = seq_lseek,
884         .release        = single_release,
885 };
886
887 static int pll_reg_raw_show(struct seq_file *s, void *data)
888 {
889         struct gk20a *g = s->private;
890         u32 reg;
891
892         mutex_lock(&g->clk.clk_mutex);
893         if (!g->clk.clk_hw_on) {
894                 seq_puts(s, "gk20a powered down - no access to registers\n");
895                 mutex_unlock(&g->clk.clk_mutex);
896                 return 0;
897         }
898
899         seq_puts(s, "GPCPLL REGISTERS:\n");
900         for (reg = trim_sys_gpcpll_cfg_r(); reg <= trim_sys_gpcpll_dvfs2_r();
901               reg += sizeof(u32))
902                 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
903
904         seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
905
906         reg = trim_sys_sel_vco_r();
907         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
908         reg = trim_sys_gpc2clk_out_r();
909         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
910         reg = trim_sys_bypassctrl_r();
911         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
912
913         mutex_unlock(&g->clk.clk_mutex);
914         return 0;
915 }
916
917 static int pll_reg_raw_open(struct inode *inode, struct file *file)
918 {
919         return single_open(file, pll_reg_raw_show, inode->i_private);
920 }
921
922 static ssize_t pll_reg_raw_write(struct file *file,
923         const char __user *userbuf, size_t count, loff_t *ppos)
924 {
925         struct gk20a *g = file->f_path.dentry->d_inode->i_private;
926         char buf[80];
927         u32 reg, val;
928
929         if (sizeof(buf) <= count)
930                 return -EINVAL;
931
932         if (copy_from_user(buf, userbuf, count))
933                 return -EFAULT;
934
935         /* terminate buffer and trim - white spaces may be appended
936          *  at the end when invoked from shell command line */
937         buf[count] = '\0';
938         strim(buf);
939
940         if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
941                 return -EINVAL;
942
943         if (((reg < trim_sys_gpcpll_cfg_r()) ||
944             (reg > trim_sys_gpcpll_dvfs2_r())) &&
945             (reg != trim_sys_sel_vco_r()) &&
946             (reg != trim_sys_gpc2clk_out_r()) &&
947             (reg != trim_sys_bypassctrl_r()))
948                 return -EPERM;
949
950         mutex_lock(&g->clk.clk_mutex);
951         if (!g->clk.clk_hw_on) {
952                 mutex_unlock(&g->clk.clk_mutex);
953                 return -EBUSY;
954         }
955         gk20a_writel(g, reg, val);
956         mutex_unlock(&g->clk.clk_mutex);
957         return count;
958 }
959
960 static const struct file_operations pll_reg_raw_fops = {
961         .open           = pll_reg_raw_open,
962         .read           = seq_read,
963         .write          = pll_reg_raw_write,
964         .llseek         = seq_lseek,
965         .release        = single_release,
966 };
967
968 static int monitor_get(void *data, u64 *val)
969 {
970         struct gk20a *g = (struct gk20a *)data;
971         struct clk_gk20a *clk = &g->clk;
972         u32 clk_slowdown, clk_slowdown_save;
973         int err;
974
975         u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */
976         u64 freq = clk->gpc_pll.clk_in;
977         u32 count1, count2;
978
979         err = gk20a_busy(g->dev);
980         if (err)
981                 return err;
982
983         mutex_lock(&g->clk.clk_mutex);
984
985         /* Disable clock slowdown during measurements */
986         clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
987         clk_slowdown = set_field(clk_slowdown_save,
988                                  therm_clk_slowdown_idle_factor_m(),
989                                  therm_clk_slowdown_idle_factor_disabled_f());
990         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
991         gk20a_readl(g, therm_clk_slowdown_r(0));
992
993         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
994                      trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
995         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
996                      trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
997                      trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
998                      trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
999         /* start */
1000
1001         /* It should take less than 5us to finish 100 cycle of 38.4MHz.
1002            But longer than 100us delay is required here. */
1003         gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
1004         udelay(200);
1005
1006         count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1007         udelay(100);
1008         count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1009         freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
1010         do_div(freq, ncycle);
1011         *val = freq;
1012
1013         /* Restore clock slowdown */
1014         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
1015         mutex_unlock(&g->clk.clk_mutex);
1016
1017         gk20a_idle(g->dev);
1018
1019         if (count1 != count2)
1020                 return -EBUSY;
1021         return 0;
1022 }
1023 DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
1024
1025 static int clk_gm20b_debugfs_init(struct gk20a *g)
1026 {
1027         struct dentry *d;
1028         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1029
1030         d = debugfs_create_file(
1031                 "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
1032         if (!d)
1033                 goto err_out;
1034
1035         d = debugfs_create_file(
1036                 "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
1037         if (!d)
1038                 goto err_out;
1039
1040         d = debugfs_create_file("pll_reg_raw",
1041                 S_IRUGO, platform->debugfs, g, &pll_reg_raw_fops);
1042         if (!d)
1043                 goto err_out;
1044
1045         d = debugfs_create_file(
1046                 "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
1047         if (!d)
1048                 goto err_out;
1049
1050         return 0;
1051
1052 err_out:
1053         pr_err("%s: Failed to make debugfs node\n", __func__);
1054         debugfs_remove_recursive(platform->debugfs);
1055         return -ENOMEM;
1056 }
1057
1058 #endif /* CONFIG_DEBUG_FS */