gpu: nvgpu: Support GPCPLL dynamic ramp in NA mode
[linux-3.10.git] / drivers / gpu / nvgpu / gm20b / clk_gm20b.c
1 /*
2  * GM20B Clocks
3  *
4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/clk.h>
20 #include <linux/delay.h>        /* for mdelay */
21 #include <linux/module.h>
22 #include <linux/debugfs.h>
23 #include <linux/uaccess.h>
24 #include <linux/clk/tegra.h>
25 #include <linux/tegra-fuse.h>
26
27 #include "gk20a/gk20a.h"
28 #include "hw_trim_gm20b.h"
29 #include "hw_timer_gm20b.h"
30 #include "hw_therm_gm20b.h"
31 #include "clk_gm20b.h"
32
33 #define ALLOW_NON_CALIBRATED_NA_MODE    1
34
35 #define gk20a_dbg_clk(fmt, arg...) \
36         gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
37
38 #define DFS_DET_RANGE   6       /* -2^6 ... 2^6-1 */
39 #define SDM_DIN_RANGE   12      /* -2^12 ... 2^12-1 */
40 #define DFS_EXT_CAL_EN  BIT(9)
41 #define DFS_EXT_STROBE  BIT(16)
42
43 #define BOOT_GPU_UV     1000000 /* gpu rail boot voltage 1.0V */
44 #define ADC_SLOPE_UV    10000   /* default ADC detection slope 10mV */
45
46 static struct pll_parms gpc_pll_params = {
47         128000,  2600000,       /* freq */
48         1300000, 2600000,       /* vco */
49         12000,   38400,         /* u */
50         1, 255,                 /* M */
51         8, 255,                 /* N */
52         1, 31,                  /* PL */
53         -58700, 86789,          /* DFS_COEFF */
54 };
55
56 #ifdef CONFIG_DEBUG_FS
57 static int clk_gm20b_debugfs_init(struct gk20a *g);
58 #endif
59 static void clk_setup_slide(struct gk20a *g, u32 clk_u);
60
61 #define DUMP_REG(addr_func) \
62 do {                                                                    \
63         addr = trim_sys_##addr_func##_r();                              \
64         data = gk20a_readl(g, addr);                                    \
65         pr_info(#addr_func "[0x%x] = 0x%x\n", addr, data);              \
66 } while (0)
67
68 static void dump_gpc_pll(struct gk20a *g, struct pll *gpll, u32 last_cfg)
69 {
70         u32 addr, data;
71
72         pr_info("**** GPCPLL DUMP ****");
73         pr_info("gpcpll s/w M=%u N=%u P=%u\n", gpll->M, gpll->N, gpll->PL);
74         pr_info("gpcpll_cfg_last = 0x%x\n", last_cfg);
75         DUMP_REG(gpcpll_cfg);
76         DUMP_REG(gpcpll_coeff);
77         DUMP_REG(sel_vco);
78         pr_info("\n");
79 }
80
81 /* 1:1 match between post divider settings and divisor value */
82 static inline u32 pl_to_div(u32 pl)
83 {
84         return pl;
85 }
86
87 static inline u32 div_to_pl(u32 div)
88 {
89         return div;
90 }
91
92 #define PLDIV_GLITCHLESS 1
93
94 #if PLDIV_GLITCHLESS
95 /*
96  * Post divider tarnsition is glitchless only if there is common "1" in binary
97  * representation of old and new settings.
98  */
99 static u32 get_interim_pldiv(u32 old_pl, u32 new_pl)
100 {
101         u32 pl;
102
103         if (old_pl & new_pl)
104                 return 0;
105
106         pl = old_pl | BIT(ffs(new_pl) - 1);     /* pl never 0 */
107         new_pl |= BIT(ffs(old_pl) - 1);
108
109         return min(pl, new_pl);
110 }
111 #endif
112
113 /* Calculate and update M/N/PL as well as pll->freq
114     ref_clk_f = clk_in_f;
115     u_f = ref_clk_f / M;
116     vco_f = u_f * N = ref_clk_f * N / M;
117     PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
118     gpcclk = gpc2clk / 2; */
119 static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
120         struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
121 {
122         u32 min_vco_f, max_vco_f;
123         u32 best_M, best_N;
124         u32 low_PL, high_PL, best_PL;
125         u32 m, n, n2;
126         u32 target_vco_f, vco_f;
127         u32 ref_clk_f, target_clk_f, u_f;
128         u32 delta, lwv, best_delta = ~0;
129         u32 pl;
130
131         BUG_ON(target_freq == NULL);
132
133         gk20a_dbg_fn("request target freq %d MHz", *target_freq);
134
135         ref_clk_f = pll->clk_in;
136         target_clk_f = *target_freq;
137         max_vco_f = pll_params->max_vco;
138         min_vco_f = pll_params->min_vco;
139         best_M = pll_params->max_M;
140         best_N = pll_params->min_N;
141         best_PL = pll_params->min_PL;
142
143         target_vco_f = target_clk_f + target_clk_f / 50;
144         if (max_vco_f < target_vco_f)
145                 max_vco_f = target_vco_f;
146
147         /* Set PL search boundaries. */
148         high_PL = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
149         high_PL = min(high_PL, pll_params->max_PL);
150         high_PL = max(high_PL, pll_params->min_PL);
151
152         low_PL = div_to_pl(min_vco_f / target_vco_f);
153         low_PL = min(low_PL, pll_params->max_PL);
154         low_PL = max(low_PL, pll_params->min_PL);
155
156         gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
157                         low_PL, pl_to_div(low_PL), high_PL, pl_to_div(high_PL));
158
159         for (pl = low_PL; pl <= high_PL; pl++) {
160                 target_vco_f = target_clk_f * pl_to_div(pl);
161
162                 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
163                         u_f = ref_clk_f / m;
164
165                         if (u_f < pll_params->min_u)
166                                 break;
167                         if (u_f > pll_params->max_u)
168                                 continue;
169
170                         n = (target_vco_f * m) / ref_clk_f;
171                         n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
172
173                         if (n > pll_params->max_N)
174                                 break;
175
176                         for (; n <= n2; n++) {
177                                 if (n < pll_params->min_N)
178                                         continue;
179                                 if (n > pll_params->max_N)
180                                         break;
181
182                                 vco_f = ref_clk_f * n / m;
183
184                                 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
185                                         lwv = (vco_f + (pl_to_div(pl) / 2))
186                                                 / pl_to_div(pl);
187                                         delta = abs(lwv - target_clk_f);
188
189                                         if (delta < best_delta) {
190                                                 best_delta = delta;
191                                                 best_M = m;
192                                                 best_N = n;
193                                                 best_PL = pl;
194
195                                                 if (best_delta == 0 ||
196                                                     /* 0.45% for non best fit */
197                                                     (!best_fit && (vco_f / best_delta > 218))) {
198                                                         goto found_match;
199                                                 }
200
201                                                 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
202                                                         delta, m, n, pl);
203                                         }
204                                 }
205                         }
206                 }
207         }
208
209 found_match:
210         BUG_ON(best_delta == ~0);
211
212         if (best_fit && best_delta != 0)
213                 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
214                         target_clk_f);
215
216         pll->M = best_M;
217         pll->N = best_N;
218         pll->PL = best_PL;
219
220         /* save current frequency */
221         pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div(pll->PL));
222
223         *target_freq = pll->freq;
224
225         gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
226                 *target_freq, pll->M, pll->N, pll->PL, pl_to_div(pll->PL));
227
228         gk20a_dbg_fn("done");
229
230         return 0;
231 }
232
233 /* GPCPLL NA/DVFS mode methods */
234
235 /*
236  * Read ADC characteristic parmeters from fuses.
237  * Determine clibration settings.
238  */
239 static int clk_config_calibration_params(struct gk20a *g)
240 {
241         int slope, offs;
242         struct pll_parms *p = &gpc_pll_params;
243
244         if (!tegra_fuse_calib_gpcpll_get_adc(&slope, &offs)) {
245                 p->uvdet_slope = slope;
246                 p->uvdet_offs = offs;
247         }
248
249         if (!p->uvdet_slope || !p->uvdet_offs) {
250                 /*
251                  * If ADC conversion slope/offset parameters are not fused
252                  * (non-production config), report error, but allow to use
253                  * boot internal calibration with default slope.
254                  */
255                 gk20a_err(dev_from_gk20a(g), "ADC coeff are not fused\n");
256                 return -EINVAL;
257         }
258         return 0;
259 }
260
261 /*
262  * Determine DFS_COEFF for the requested voltage. Always select external
263  * calibration override equal to the voltage, and set maximum detection
264  * limit "0" (to make sure that PLL output remains under F/V curve when
265  * voltage increases).
266  */
267 static void clk_config_dvfs_detection(int mv, struct na_dvfs *d)
268 {
269         u32 coeff, coeff_max;
270         struct pll_parms *p = &gpc_pll_params;
271
272         coeff_max = trim_sys_gpcpll_dvfs0_dfs_coeff_v(
273                 trim_sys_gpcpll_dvfs0_dfs_coeff_m());
274         coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
275         coeff = DIV_ROUND_CLOSEST(coeff, 1000);
276         coeff = min(coeff, coeff_max);
277         d->dfs_coeff = coeff;
278
279         d->dfs_ext_cal = DIV_ROUND_CLOSEST(mv * 1000 - p->uvdet_offs,
280                                            p->uvdet_slope);
281         BUG_ON(abs(d->dfs_ext_cal) >= (1 << DFS_DET_RANGE));
282         d->uv_cal = p->uvdet_offs + d->dfs_ext_cal * p->uvdet_slope;
283         d->dfs_det_max = 0;
284 }
285
286 /*
287  * Solve equation for integer and fractional part of the effective NDIV:
288  *
289  * n_eff = n_int + 1/2 + SDM_DIN / 2^(SDM_DIN_RANGE + 1) +
290  * DVFS_COEFF * DVFS_DET_DELTA / 2^DFS_DET_RANGE
291  *
292  * The SDM_DIN LSB is finally shifted out, since it is not accessible by s/w.
293  */
294 static void clk_config_dvfs_ndiv(int mv, u32 n_eff, struct na_dvfs *d)
295 {
296         int n, det_delta;
297         u32 rem, rem_range;
298         struct pll_parms *p = &gpc_pll_params;
299
300         det_delta = (mv * 1000 - d->uv_cal);
301         det_delta = min(det_delta, d->dfs_det_max * p->uvdet_slope);
302         det_delta = det_delta * d->dfs_coeff;
303         det_delta = DIV_ROUND_CLOSEST(det_delta, p->uvdet_slope);
304
305         n = (int)(n_eff << DFS_DET_RANGE) - det_delta;
306         BUG_ON((n < 0) || (n > (p->max_N << DFS_DET_RANGE)));
307         d->n_int = ((u32)n) >> DFS_DET_RANGE;
308
309         rem = ((u32)n) & ((1 << DFS_DET_RANGE) - 1);
310         rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
311         d->sdm_din = (rem << rem_range) - (1 << SDM_DIN_RANGE);
312         d->sdm_din = (d->sdm_din >> BITS_PER_BYTE) & 0xff;
313 }
314
315 /* Voltage dependent configuration */
316 static void clk_config_dvfs(struct gk20a *g, struct pll *gpll)
317 {
318         struct na_dvfs *d = &gpll->dvfs;
319
320         d->mv = tegra_dvfs_predict_millivolts_t(
321                         clk_get_parent(g->clk.tegra_clk),
322                         rate_gpc2clk_to_gpu(gpll->freq));
323         clk_config_dvfs_detection(d->mv, d);
324         clk_config_dvfs_ndiv(d->mv, gpll->N, d);
325 }
326
327 /* Update DVFS detection settings in flight */
328 static void clk_set_dfs_coeff(struct gk20a *g, u32 dfs_coeff)
329 {
330         u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
331         data |= DFS_EXT_STROBE;
332         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
333
334         data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
335         data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
336                 trim_sys_gpcpll_dvfs0_dfs_coeff_f(dfs_coeff));
337         gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
338
339         data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
340         udelay(1);
341         data &= ~DFS_EXT_STROBE;
342         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
343 }
344
345 static void __maybe_unused clk_set_dfs_det_max(struct gk20a *g, u32 dfs_det_max)
346 {
347         u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
348         data |= DFS_EXT_STROBE;
349         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
350
351         data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
352         data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
353                 trim_sys_gpcpll_dvfs0_dfs_det_max_f(dfs_det_max));
354         gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
355
356         data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
357         udelay(1);
358         data &= ~DFS_EXT_STROBE;
359         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
360 }
361
362 static void clk_set_dfs_ext_cal(struct gk20a *g, u32 dfs_det_cal)
363 {
364         u32 data;
365
366         data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
367         data &= ~(BIT(DFS_DET_RANGE + 1) - 1);
368         data |= dfs_det_cal;
369         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
370
371         data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
372         udelay(1);
373         if (~trim_sys_gpcpll_dvfs1_dfs_ctrl_v(data) & DFS_EXT_CAL_EN) {
374                 data = set_field(data, trim_sys_gpcpll_dvfs1_dfs_ctrl_m(),
375                         trim_sys_gpcpll_dvfs1_dfs_ctrl_f(DFS_EXT_CAL_EN));
376                 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
377         }
378 }
379
380 static void clk_setup_dvfs_detection(struct gk20a *g, struct pll *gpll)
381 {
382         struct na_dvfs *d = &gpll->dvfs;
383
384         u32 data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
385         data |= DFS_EXT_STROBE;
386         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
387
388         data = gk20a_readl(g, trim_sys_gpcpll_dvfs0_r());
389         data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_coeff_m(),
390                 trim_sys_gpcpll_dvfs0_dfs_coeff_f(d->dfs_coeff));
391         data = set_field(data, trim_sys_gpcpll_dvfs0_dfs_det_max_m(),
392                 trim_sys_gpcpll_dvfs0_dfs_det_max_f(d->dfs_det_max));
393         gk20a_writel(g, trim_sys_gpcpll_dvfs0_r(), data);
394
395         data = gk20a_readl(g, trim_gpc_bcast_gpcpll_dvfs2_r());
396         udelay(1);
397         data &= ~DFS_EXT_STROBE;
398         gk20a_writel(g, trim_gpc_bcast_gpcpll_dvfs2_r(), data);
399
400         clk_set_dfs_ext_cal(g, d->dfs_ext_cal);
401 }
402
403 /* Enable NA/DVFS mode */
404 static int clk_enbale_pll_dvfs(struct gk20a *g)
405 {
406         u32 data;
407         int delay = 5;  /* use for iddq exit delay & calib timeout */
408         struct pll_parms *p = &gpc_pll_params;
409         bool calibrated = p->uvdet_slope && p->uvdet_offs;
410
411         /* FIXME: Set VCO_CTRL */
412
413         /* Enable NA DVFS */
414         data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
415         data |= trim_sys_gpcpll_dvfs1_en_dfs_m();
416         gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
417
418         /*
419          * If calibration parameters are known (either from fuses, or from
420          * internal calibration on boot) - use them. Internal calibration is
421          * started anyway; it will complete, but results will not be used.
422          */
423         if (calibrated) {
424                 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
425                 data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
426                 gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
427         }
428
429         /* Exit IDDQ mode */
430         data = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
431         data = set_field(data, trim_sys_gpcpll_cfg_iddq_m(),
432                          trim_sys_gpcpll_cfg_iddq_power_on_v());
433         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), data);
434         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
435         udelay(delay);
436
437         /*
438          * Dynamic ramp setup based on update rate, which in DVFS mode on GM20b
439          * is always 38.4 MHz, the same as reference clock rate.
440          */
441         clk_setup_slide(g, g->clk.gpc_pll.clk_in);
442
443         if (calibrated)
444                 return 0;
445
446         /*
447          * If calibration parameters are not fused, start internal calibration,
448          * wait for completion, and use results along with default slope to
449          * calculate ADC offset during boot.
450          */
451         data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
452         data |= trim_sys_gpcpll_dvfs1_en_dfs_cal_m();
453         gk20a_writel(g, trim_sys_gpcpll_dvfs1_r(), data);
454
455         /* Wait for internal calibration done (spec < 2us). */
456         do {
457                 data = gk20a_readl(g, trim_sys_gpcpll_dvfs1_r());
458                 if (trim_sys_gpcpll_dvfs1_dfs_cal_done_v(data))
459                         break;
460                 udelay(1);
461                 delay--;
462         } while (delay > 0);
463
464         if (delay <= 0) {
465                 gk20a_err(dev_from_gk20a(g), "GPCPLL calibration timeout");
466                 return -ETIMEDOUT;
467         }
468
469         data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
470         data = trim_sys_gpcpll_cfg3_dfs_testout_v(data);
471         p->uvdet_offs = BOOT_GPU_UV - data * ADC_SLOPE_UV;
472         p->uvdet_slope = ADC_SLOPE_UV;
473         return 0;
474 }
475
476 /* GPCPLL slide methods */
477 static void clk_setup_slide(struct gk20a *g, u32 clk_u)
478 {
479         u32 data, step_a, step_b;
480
481         switch (clk_u) {
482         case 12000:
483         case 12800:
484         case 13000:                     /* only on FPGA */
485                 step_a = 0x2B;
486                 step_b = 0x0B;
487                 break;
488         case 19200:
489                 step_a = 0x12;
490                 step_b = 0x08;
491                 break;
492         case 38400:
493                 step_a = 0x04;
494                 step_b = 0x05;
495                 break;
496         default:
497                 gk20a_err(dev_from_gk20a(g), "Unexpected reference rate %u kHz",
498                           clk_u);
499                 BUG();
500         }
501
502         /* setup */
503         data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
504         data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
505                         trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
506         gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
507         data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
508         data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
509                         trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
510         gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
511 }
512
513 static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll)
514 {
515         u32 data, coeff;
516         u32 nold, sdm_old;
517         int ramp_timeout = 500;
518
519         /* get old coefficients */
520         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
521         nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
522
523         /* do nothing if NDIV is same */
524         if (gpll->mode == GPC_PLL_MODE_DVFS) {
525                 /* in DVFS mode check both integer and fraction */
526                 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
527                 sdm_old = trim_sys_gpcpll_cfg2_sdm_din_v(coeff);
528                 if ((gpll->dvfs.n_int == nold) &&
529                     (gpll->dvfs.sdm_din == sdm_old))
530                         return 0;
531         } else {
532                 if (gpll->N == nold)
533                         return 0;
534
535                 /* dynamic ramp setup based on update rate */
536                 clk_setup_slide(g, gpll->clk_in / gpll->M);
537         }
538
539         /* pll slowdown mode */
540         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
541         data = set_field(data,
542                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
543                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
544         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
545
546         /* new ndiv ready for ramp */
547         if (gpll->mode == GPC_PLL_MODE_DVFS) {
548                 /* in DVFS mode SDM is updated via "new" field */
549                 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
550                 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_new_m(),
551                         trim_sys_gpcpll_cfg2_sdm_din_new_f(gpll->dvfs.sdm_din));
552                 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
553
554                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
555                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
556                         trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int));
557                 udelay(1);
558                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
559         } else {
560                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
561                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
562                                 trim_sys_gpcpll_coeff_ndiv_f(gpll->N));
563                 udelay(1);
564                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
565         }
566
567         /* dynamic ramp to new ndiv */
568         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
569         data = set_field(data,
570                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
571                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
572         udelay(1);
573         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
574
575         do {
576                 udelay(1);
577                 ramp_timeout--;
578                 data = gk20a_readl(
579                         g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
580                 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
581                         break;
582         } while (ramp_timeout > 0);
583
584         if ((gpll->mode == GPC_PLL_MODE_DVFS) && (ramp_timeout > 0)) {
585                 /* in DVFS mode complete SDM update */
586                 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
587                 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
588                         trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
589                 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
590         }
591
592         /* exit slowdown mode */
593         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
594         data = set_field(data,
595                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
596                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
597         data = set_field(data,
598                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
599                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
600         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
601         gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
602
603         if (ramp_timeout <= 0) {
604                 gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
605                 return -ETIMEDOUT;
606         }
607         return 0;
608 }
609
610 /* GPCPLL bypass methods */
611 static int clk_change_pldiv_under_bypass(struct gk20a *g, struct pll *gpll)
612 {
613         u32 data, coeff;
614
615         /* put PLL in bypass before programming it */
616         data = gk20a_readl(g, trim_sys_sel_vco_r());
617         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
618                 trim_sys_sel_vco_gpc2clk_out_bypass_f());
619         gk20a_writel(g, trim_sys_sel_vco_r(), data);
620
621         /* change PLDIV */
622         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
623         udelay(1);
624         coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
625                           trim_sys_gpcpll_coeff_pldiv_f(gpll->PL));
626         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
627
628         /* put PLL back on vco */
629         data = gk20a_readl(g, trim_sys_sel_vco_r());
630         udelay(1);
631         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
632                 trim_sys_sel_vco_gpc2clk_out_vco_f());
633         gk20a_writel(g, trim_sys_sel_vco_r(), data);
634
635         return 0;
636 }
637
638 static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll)
639 {
640         u32 data, cfg, coeff, timeout;
641
642         /* put PLL in bypass before programming it */
643         data = gk20a_readl(g, trim_sys_sel_vco_r());
644         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
645                 trim_sys_sel_vco_gpc2clk_out_bypass_f());
646         gk20a_writel(g, trim_sys_sel_vco_r(), data);
647
648         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
649         udelay(1);
650         if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
651                 /* get out from IDDQ (1st power up) */
652                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
653                                 trim_sys_gpcpll_cfg_iddq_power_on_v());
654                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
655                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
656                 udelay(5);
657         } else {
658                 /* clear SYNC_MODE before disabling PLL */
659                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
660                                 trim_sys_gpcpll_cfg_sync_mode_disable_f());
661                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
662                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
663
664                 /* disable running PLL before changing coefficients */
665                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
666                                 trim_sys_gpcpll_cfg_enable_no_f());
667                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
668                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
669         }
670
671         /* change coefficients */
672         if (gpll->mode == GPC_PLL_MODE_DVFS) {
673                 clk_setup_dvfs_detection(g, gpll);
674
675                 coeff = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
676                 coeff = set_field(coeff, trim_sys_gpcpll_cfg2_sdm_din_m(),
677                         trim_sys_gpcpll_cfg2_sdm_din_f(gpll->dvfs.sdm_din));
678                 gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), coeff);
679
680                 coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
681                         trim_sys_gpcpll_coeff_ndiv_f(gpll->dvfs.n_int) |
682                         trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
683                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
684         } else {
685                 coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
686                         trim_sys_gpcpll_coeff_ndiv_f(gpll->N) |
687                         trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
688                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
689         }
690
691         /* enable PLL after changing coefficients */
692         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
693         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
694                         trim_sys_gpcpll_cfg_enable_yes_f());
695         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
696
697         /* just delay in DVFS mode (lock cannot be used) */
698         if (gpll->mode == GPC_PLL_MODE_DVFS) {
699                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
700                 udelay(g->clk.na_pll_delay);
701                 goto pll_locked;
702         }
703
704         /* lock pll */
705         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
706         if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
707                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
708                         trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
709                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
710                 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
711         }
712
713         /* wait pll lock */
714         timeout = g->clk.pll_delay + 1;
715         do {
716                 udelay(1);
717                 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
718                 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
719                         goto pll_locked;
720         } while (--timeout > 0);
721
722         /* PLL is messed up. What can we do here? */
723         dump_gpc_pll(g, gpll, cfg);
724         BUG();
725         return -EBUSY;
726
727 pll_locked:
728         gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
729                 trim_sys_gpcpll_cfg_r(), cfg);
730
731         /* set SYNC_MODE for glitchless switch out of bypass */
732         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
733                         trim_sys_gpcpll_cfg_sync_mode_enable_f());
734         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
735         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
736
737         /* put PLL back on vco */
738         data = gk20a_readl(g, trim_sys_sel_vco_r());
739         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
740                 trim_sys_sel_vco_gpc2clk_out_vco_f());
741         gk20a_writel(g, trim_sys_sel_vco_r(), data);
742
743         return 0;
744 }
745
746 /* GPCPLL programming in legacy (non-DVFS) mode */
747 static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new,
748                         int allow_slide)
749 {
750         u32 cfg, coeff, data;
751         bool can_slide, pldiv_only;
752         struct pll gpll;
753
754         gk20a_dbg_fn("");
755
756         if (!tegra_platform_is_silicon())
757                 return 0;
758
759         /* get old coefficients */
760         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
761         gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
762         gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff);
763         gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff);
764         gpll.clk_in = gpll_new->clk_in;
765
766         /* combine target dvfs with old coefficients */
767         gpll.dvfs = gpll_new->dvfs;
768         gpll.mode = gpll_new->mode;
769
770         /* do NDIV slide if there is no change in M and PL */
771         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
772         can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
773
774         if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL))
775                 return clk_slide_gpc_pll(g, gpll_new);
776
777         /* slide down to NDIV_LO */
778         if (can_slide) {
779                 int ret;
780                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
781                                       gpll.clk_in);
782                 if (gpll.mode == GPC_PLL_MODE_DVFS)
783                         clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
784                 ret = clk_slide_gpc_pll(g, &gpll);
785                 if (ret)
786                         return ret;
787         }
788         pldiv_only = can_slide && (gpll_new->M == gpll.M);
789
790         /*
791          *  Split FO-to-bypass jump in halfs by setting out divider 1:2.
792          *  (needed even if PLDIV_GLITCHLESS is set, since 1:1 <=> 1:2 direct
793          *  transition is not really glitch-less - see get_interim_pldiv
794          *  function header).
795          */
796         if ((gpll_new->PL < 2) || (gpll.PL < 2)) {
797                 data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
798                 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
799                         trim_sys_gpc2clk_out_vcodiv_f(2));
800                 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
801                 /* Intentional 2nd write to assure linear divider operation */
802                 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
803                 gk20a_readl(g, trim_sys_gpc2clk_out_r());
804                 udelay(2);
805         }
806
807 #if PLDIV_GLITCHLESS
808         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
809         if (pldiv_only) {
810                 /* Insert interim PLDIV state if necessary */
811                 u32 interim_pl = get_interim_pldiv(gpll_new->PL, gpll.PL);
812                 if (interim_pl) {
813                         coeff = set_field(coeff,
814                                 trim_sys_gpcpll_coeff_pldiv_m(),
815                                 trim_sys_gpcpll_coeff_pldiv_f(interim_pl));
816                         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
817                         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
818                 }
819                 goto set_pldiv; /* path A: no need to bypass */
820         }
821
822         /* path B: bypass if either M changes or PLL is disabled */
823 #endif
824         /*
825          * Program and lock pll under bypass. On exit PLL is out of bypass,
826          * enabled, and locked. VCO is at vco_min if sliding is allowed.
827          * Otherwise it is at VCO target (and therefore last slide call below
828          * is effectively NOP). PL is set to target. Output divider is engaged
829          * at 1:2 if either entry, or exit PL setting is 1:1.
830          */
831         gpll = *gpll_new;
832         if (allow_slide) {
833                 gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco,
834                                       gpll_new->clk_in);
835                 if (gpll.mode == GPC_PLL_MODE_DVFS)
836                         clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
837         }
838         if (pldiv_only)
839                 clk_change_pldiv_under_bypass(g, &gpll);
840         else
841                 clk_lock_gpc_pll_under_bypass(g, &gpll);
842
843 #if PLDIV_GLITCHLESS
844         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
845
846 set_pldiv:
847         /* coeff must be current from either path A or B */
848         if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) {
849                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
850                         trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL));
851                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
852         }
853 #endif
854         /* restore out divider 1:1 */
855         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
856         if ((data & trim_sys_gpc2clk_out_vcodiv_m()) !=
857             trim_sys_gpc2clk_out_vcodiv_by1_f()) {
858                 data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
859                                  trim_sys_gpc2clk_out_vcodiv_by1_f());
860                 udelay(2);
861                 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
862                 /* Intentional 2nd write to assure linear divider operation */
863                 gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
864                 gk20a_readl(g, trim_sys_gpc2clk_out_r());
865         }
866
867         /* slide up to target NDIV */
868         return clk_slide_gpc_pll(g, gpll_new);
869 }
870
871 /* GPCPLL programming in DVFS mode */
872 static int clk_program_na_gpc_pll(struct gk20a *g, struct pll *gpll_new,
873                                   int allow_slide)
874 {
875         clk_config_dvfs(g, gpll_new);
876
877         if (!gpll_new->enabled)
878                 return clk_program_gpc_pll(g, gpll_new, allow_slide);
879
880         /* always under bypass, for now */
881         return clk_program_gpc_pll(g, gpll_new, 0);
882 }
883
884 static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
885 {
886         u32 cfg, coeff;
887         struct clk_gk20a *clk = &g->clk;
888         struct pll gpll = clk->gpc_pll;
889
890         /* slide to VCO min */
891         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
892         if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
893                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
894                 gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
895                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
896                                       gpll.clk_in);
897                 if (gpll.mode == GPC_PLL_MODE_DVFS)
898                         clk_config_dvfs_ndiv(gpll.dvfs.mv, gpll.N, &gpll.dvfs);
899                 clk_slide_gpc_pll(g, &gpll);
900         }
901
902         /* put PLL in bypass before disabling it */
903         cfg = gk20a_readl(g, trim_sys_sel_vco_r());
904         cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
905                         trim_sys_sel_vco_gpc2clk_out_bypass_f());
906         gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
907
908         /* clear SYNC_MODE before disabling PLL */
909         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
910         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
911                         trim_sys_gpcpll_cfg_sync_mode_disable_f());
912         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
913
914         /* disable PLL */
915         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
916         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
917                         trim_sys_gpcpll_cfg_enable_no_f());
918         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
919         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
920
921         clk->gpc_pll.enabled = false;
922         clk->gpc_pll_last.enabled = false;
923         return 0;
924 }
925
926 static int gm20b_init_clk_reset_enable_hw(struct gk20a *g)
927 {
928         gk20a_dbg_fn("");
929         return 0;
930 }
931
932 struct clk *gm20b_clk_get(struct gk20a *g)
933 {
934         if (!g->clk.tegra_clk) {
935                 struct clk *clk;
936
937                 clk = clk_get_sys("tegra_gk20a", "gpu");
938                 if (IS_ERR(clk)) {
939                         gk20a_err(dev_from_gk20a(g),
940                                 "fail to get tegra gpu clk tegra_gk20a/gpu");
941                         return NULL;
942                 }
943                 g->clk.tegra_clk = clk;
944         }
945
946         return g->clk.tegra_clk;
947 }
948
949 static int gm20b_init_clk_setup_sw(struct gk20a *g)
950 {
951         struct clk_gk20a *clk = &g->clk;
952         static int initialized;
953         struct clk *ref;
954         unsigned long ref_rate;
955         bool calibrated;
956
957         gk20a_dbg_fn("");
958
959         if (clk->sw_ready) {
960                 gk20a_dbg_fn("skip init");
961                 return 0;
962         }
963
964         if (!gk20a_clk_get(g))
965                 return -EINVAL;
966
967         ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
968         if (IS_ERR(ref)) {
969                 gk20a_err(dev_from_gk20a(g),
970                         "failed to get GPCPLL reference clock");
971                 return -EINVAL;
972         }
973         ref_rate = clk_get_rate(ref);
974
975         /*
976          * Locking time in both legacy and DVFS mode is 40us. However, in legacy
977          * mode we rely on lock detection signal, and delay is just timeout
978          * limit, so we can afford set it longer. In DVFS mode each lock inserts
979          * specified delay, so it should be set as short as h/w allows.
980          */
981         clk->pll_delay = 300; /* usec */
982         clk->na_pll_delay = 40; /* usec*/
983
984         clk->gpc_pll.id = GK20A_GPC_PLL;
985         clk->gpc_pll.clk_in = ref_rate / KHZ;
986
987         /* Initial frequency: 1/3 VCO min (low enough to be safe at Vmin) */
988         if (!initialized) {
989                 initialized = 1;
990                 clk->gpc_pll.M = 1;
991                 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
992                                         clk->gpc_pll.clk_in);
993                 clk->gpc_pll.PL = 3;
994                 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
995                 clk->gpc_pll.freq /= pl_to_div(clk->gpc_pll.PL);
996         }
997
998         calibrated = !clk_config_calibration_params(g);
999 #ifdef CONFIG_TEGRA_USE_NA_GPCPLL
1000         if (ALLOW_NON_CALIBRATED_NA_MODE || calibrated) {
1001                 /* NA mode is supported only at max update rate 38.4 MHz */
1002                 if (clk->gpc_pll.clk_in == gpc_pll_params.max_u) {
1003                         clk->gpc_pll.mode = GPC_PLL_MODE_DVFS;
1004                         gpc_pll_params.min_u = gpc_pll_params.max_u;
1005                 }
1006         }
1007 #endif
1008
1009         mutex_init(&clk->clk_mutex);
1010
1011         clk->sw_ready = true;
1012
1013         gk20a_dbg_fn("done");
1014         return 0;
1015 }
1016
1017 static int gm20b_init_clk_setup_hw(struct gk20a *g)
1018 {
1019         u32 data;
1020
1021         gk20a_dbg_fn("");
1022
1023         /* LDIV: Div4 mode (required); both  bypass and vco ratios 1:1 */
1024         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
1025         data = set_field(data,
1026                         trim_sys_gpc2clk_out_sdiv14_m() |
1027                         trim_sys_gpc2clk_out_vcodiv_m() |
1028                         trim_sys_gpc2clk_out_bypdiv_m(),
1029                         trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
1030                         trim_sys_gpc2clk_out_vcodiv_by1_f() |
1031                         trim_sys_gpc2clk_out_bypdiv_f(0));
1032         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
1033
1034         /*
1035          * Clear global bypass control; PLL is still under bypass, since SEL_VCO
1036          * is cleared by default.
1037          */
1038         data = gk20a_readl(g, trim_sys_bypassctrl_r());
1039         data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
1040                          trim_sys_bypassctrl_gpcpll_vco_f());
1041         gk20a_writel(g, trim_sys_bypassctrl_r(), data);
1042
1043         /* Disable idle slow down */
1044         data = gk20a_readl(g, therm_clk_slowdown_r(0));
1045         data = set_field(data, therm_clk_slowdown_idle_factor_m(),
1046                          therm_clk_slowdown_idle_factor_disabled_f());
1047         gk20a_writel(g, therm_clk_slowdown_r(0), data);
1048         gk20a_readl(g, therm_clk_slowdown_r(0));
1049
1050         if (g->clk.gpc_pll.mode == GPC_PLL_MODE_DVFS)
1051                 return clk_enbale_pll_dvfs(g);
1052
1053         return 0;
1054 }
1055
1056 static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
1057 {
1058         struct clk_gk20a *clk = &g->clk;
1059
1060         if (freq > gpc_pll_params.max_freq)
1061                 freq = gpc_pll_params.max_freq;
1062         else if (freq < gpc_pll_params.min_freq)
1063                 freq = gpc_pll_params.min_freq;
1064
1065         if (freq != old_freq) {
1066                 /* gpc_pll.freq is changed to new value here */
1067                 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
1068                                    &freq, true)) {
1069                         gk20a_err(dev_from_gk20a(g),
1070                                    "failed to set pll target for %d", freq);
1071                         return -EINVAL;
1072                 }
1073         }
1074         return 0;
1075 }
1076
1077 static int set_pll_freq(struct gk20a *g, int allow_slide)
1078 {
1079         struct clk_gk20a *clk = &g->clk;
1080         int err = 0;
1081
1082         gk20a_dbg_fn("last freq: %dMHz, target freq %dMHz",
1083                      clk->gpc_pll_last.freq, clk->gpc_pll.freq);
1084
1085         /* If programming with dynamic sliding failed, re-try under bypass */
1086         if (clk->gpc_pll.mode == GPC_PLL_MODE_DVFS) {
1087                 err = clk_program_na_gpc_pll(g, &clk->gpc_pll, allow_slide);
1088                 if (err && allow_slide)
1089                         err = clk_program_na_gpc_pll(g, &clk->gpc_pll, 0);
1090         } else {
1091                 err = clk_program_gpc_pll(g, &clk->gpc_pll, allow_slide);
1092                 if (err && allow_slide)
1093                         err = clk_program_gpc_pll(g, &clk->gpc_pll, 0);
1094         }
1095
1096         if (!err) {
1097                 clk->gpc_pll.enabled = true;
1098                 clk->gpc_pll_last = clk->gpc_pll;
1099                 return 0;
1100         }
1101
1102         /*
1103          * Just report error but not restore PLL since dvfs could already change
1104          * voltage even when programming failed.
1105          */
1106         gk20a_err(dev_from_gk20a(g), "failed to set pll to %d",
1107                   clk->gpc_pll.freq);
1108         return err;
1109 }
1110
1111 static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
1112 {
1113         u32 old_freq;
1114         int ret = -ENODATA;
1115         struct gk20a *g = data;
1116         struct clk_gk20a *clk = &g->clk;
1117
1118         if (rate) {
1119                 mutex_lock(&clk->clk_mutex);
1120                 old_freq = clk->gpc_pll.freq;
1121                 ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
1122                 if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
1123                         ret = set_pll_freq(g, 1);
1124                 if (!ret)
1125                         *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
1126                 mutex_unlock(&clk->clk_mutex);
1127         }
1128         return ret;
1129 }
1130
1131 static int gm20b_clk_export_enable(void *data)
1132 {
1133         int ret = 0;
1134         struct gk20a *g = data;
1135         struct clk_gk20a *clk = &g->clk;
1136
1137         mutex_lock(&clk->clk_mutex);
1138         if (!clk->gpc_pll.enabled && clk->clk_hw_on)
1139                 ret = set_pll_freq(g, 1);
1140         mutex_unlock(&clk->clk_mutex);
1141         return ret;
1142 }
1143
1144 static void gm20b_clk_export_disable(void *data)
1145 {
1146         struct gk20a *g = data;
1147         struct clk_gk20a *clk = &g->clk;
1148
1149         mutex_lock(&clk->clk_mutex);
1150         if (clk->gpc_pll.enabled && clk->clk_hw_on)
1151                 clk_disable_gpcpll(g, 1);
1152         mutex_unlock(&clk->clk_mutex);
1153 }
1154
1155 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
1156 {
1157         struct gk20a *g = data;
1158         struct clk_gk20a *clk = &g->clk;
1159
1160         mutex_lock(&clk->clk_mutex);
1161         if (state)
1162                 *state = clk->gpc_pll.enabled;
1163         if (rate)
1164                 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
1165         mutex_unlock(&clk->clk_mutex);
1166 }
1167
1168 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
1169         .init = gm20b_clk_export_init,
1170         .enable = gm20b_clk_export_enable,
1171         .disable = gm20b_clk_export_disable,
1172         .set_rate = gm20b_clk_export_set_rate,
1173 };
1174
1175 static int gm20b_clk_register_export_ops(struct gk20a *g)
1176 {
1177         int ret;
1178         struct clk *c;
1179
1180         if (gm20b_clk_export_ops.data)
1181                 return 0;
1182
1183         gm20b_clk_export_ops.data = (void *)g;
1184         c = g->clk.tegra_clk;
1185         if (!c || !clk_get_parent(c))
1186                 return -ENOSYS;
1187
1188         ret = tegra_clk_register_export_ops(clk_get_parent(c),
1189                                             &gm20b_clk_export_ops);
1190
1191         return ret;
1192 }
1193
1194 static int gm20b_init_clk_support(struct gk20a *g)
1195 {
1196         struct clk_gk20a *clk = &g->clk;
1197         u32 err;
1198
1199         gk20a_dbg_fn("");
1200
1201         clk->g = g;
1202
1203         err = gm20b_init_clk_reset_enable_hw(g);
1204         if (err)
1205                 return err;
1206
1207         err = gm20b_init_clk_setup_sw(g);
1208         if (err)
1209                 return err;
1210
1211         mutex_lock(&clk->clk_mutex);
1212         clk->clk_hw_on = true;
1213
1214         err = gm20b_init_clk_setup_hw(g);
1215         mutex_unlock(&clk->clk_mutex);
1216         if (err)
1217                 return err;
1218
1219         err = gm20b_clk_register_export_ops(g);
1220         if (err)
1221                 return err;
1222
1223         /* FIXME: this effectively prevents host level clock gating */
1224         err = clk_enable(g->clk.tegra_clk);
1225         if (err)
1226                 return err;
1227
1228         /* The prev call may not enable PLL if gbus is unbalanced - force it */
1229         mutex_lock(&clk->clk_mutex);
1230         if (!clk->gpc_pll.enabled)
1231                 err = set_pll_freq(g, 1);
1232         mutex_unlock(&clk->clk_mutex);
1233         if (err)
1234                 return err;
1235
1236 #ifdef CONFIG_DEBUG_FS
1237         if (!clk->debugfs_set) {
1238                 if (!clk_gm20b_debugfs_init(g))
1239                         clk->debugfs_set = true;
1240         }
1241 #endif
1242         return err;
1243 }
1244
1245 static int gm20b_suspend_clk_support(struct gk20a *g)
1246 {
1247         int ret = 0;
1248
1249         clk_disable(g->clk.tegra_clk);
1250
1251         /* The prev call may not disable PLL if gbus is unbalanced - force it */
1252         mutex_lock(&g->clk.clk_mutex);
1253         if (g->clk.gpc_pll.enabled)
1254                 ret = clk_disable_gpcpll(g, 1);
1255         g->clk.clk_hw_on = false;
1256         mutex_unlock(&g->clk.clk_mutex);
1257         return ret;
1258 }
1259
1260 void gm20b_init_clk_ops(struct gpu_ops *gops)
1261 {
1262         gops->clk.init_clk_support = gm20b_init_clk_support;
1263         gops->clk.suspend_clk_support = gm20b_suspend_clk_support;
1264 }
1265
1266 #ifdef CONFIG_DEBUG_FS
1267
1268 static int rate_get(void *data, u64 *val)
1269 {
1270         struct gk20a *g = (struct gk20a *)data;
1271         *val = (u64)gk20a_clk_get_rate(g);
1272         return 0;
1273 }
1274 static int rate_set(void *data, u64 val)
1275 {
1276         struct gk20a *g = (struct gk20a *)data;
1277         return gk20a_clk_set_rate(g, (u32)val);
1278 }
1279 DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
1280
1281 static int pll_reg_show(struct seq_file *s, void *data)
1282 {
1283         struct gk20a *g = s->private;
1284         u32 reg, m, n, pl, f;
1285
1286         mutex_lock(&g->clk.clk_mutex);
1287         if (!g->clk.clk_hw_on) {
1288                 seq_printf(s, "gk20a powered down - no access to registers\n");
1289                 mutex_unlock(&g->clk.clk_mutex);
1290                 return 0;
1291         }
1292
1293         reg = gk20a_readl(g, trim_sys_bypassctrl_r());
1294         seq_printf(s, "bypassctrl = %s, ", reg ? "bypass" : "vco");
1295         reg = gk20a_readl(g, trim_sys_sel_vco_r());
1296         seq_printf(s, "sel_vco = %s, ", reg ? "vco" : "bypass");
1297
1298         reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
1299         seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", reg,
1300                 trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
1301                 trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked",
1302                 trim_sys_gpcpll_cfg_sync_mode_v(reg) ? "sync_on" : "sync_off");
1303
1304         reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
1305         m = trim_sys_gpcpll_coeff_mdiv_v(reg);
1306         n = trim_sys_gpcpll_coeff_ndiv_v(reg);
1307         pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
1308         f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
1309         seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
1310         seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
1311         mutex_unlock(&g->clk.clk_mutex);
1312         return 0;
1313 }
1314
1315 static int pll_reg_open(struct inode *inode, struct file *file)
1316 {
1317         return single_open(file, pll_reg_show, inode->i_private);
1318 }
1319
1320 static const struct file_operations pll_reg_fops = {
1321         .open           = pll_reg_open,
1322         .read           = seq_read,
1323         .llseek         = seq_lseek,
1324         .release        = single_release,
1325 };
1326
1327 static int pll_reg_raw_show(struct seq_file *s, void *data)
1328 {
1329         struct gk20a *g = s->private;
1330         u32 reg;
1331
1332         mutex_lock(&g->clk.clk_mutex);
1333         if (!g->clk.clk_hw_on) {
1334                 seq_puts(s, "gk20a powered down - no access to registers\n");
1335                 mutex_unlock(&g->clk.clk_mutex);
1336                 return 0;
1337         }
1338
1339         seq_puts(s, "GPCPLL REGISTERS:\n");
1340         for (reg = trim_sys_gpcpll_cfg_r(); reg <= trim_sys_gpcpll_dvfs2_r();
1341               reg += sizeof(u32))
1342                 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
1343
1344         seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
1345
1346         reg = trim_sys_sel_vco_r();
1347         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
1348         reg = trim_sys_gpc2clk_out_r();
1349         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
1350         reg = trim_sys_bypassctrl_r();
1351         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
1352
1353         mutex_unlock(&g->clk.clk_mutex);
1354         return 0;
1355 }
1356
1357 static int pll_reg_raw_open(struct inode *inode, struct file *file)
1358 {
1359         return single_open(file, pll_reg_raw_show, inode->i_private);
1360 }
1361
1362 static ssize_t pll_reg_raw_write(struct file *file,
1363         const char __user *userbuf, size_t count, loff_t *ppos)
1364 {
1365         struct gk20a *g = file->f_path.dentry->d_inode->i_private;
1366         char buf[80];
1367         u32 reg, val;
1368
1369         if (sizeof(buf) <= count)
1370                 return -EINVAL;
1371
1372         if (copy_from_user(buf, userbuf, count))
1373                 return -EFAULT;
1374
1375         /* terminate buffer and trim - white spaces may be appended
1376          *  at the end when invoked from shell command line */
1377         buf[count] = '\0';
1378         strim(buf);
1379
1380         if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
1381                 return -EINVAL;
1382
1383         if (((reg < trim_sys_gpcpll_cfg_r()) ||
1384             (reg > trim_sys_gpcpll_dvfs2_r())) &&
1385             (reg != trim_sys_sel_vco_r()) &&
1386             (reg != trim_sys_gpc2clk_out_r()) &&
1387             (reg != trim_sys_bypassctrl_r()))
1388                 return -EPERM;
1389
1390         mutex_lock(&g->clk.clk_mutex);
1391         if (!g->clk.clk_hw_on) {
1392                 mutex_unlock(&g->clk.clk_mutex);
1393                 return -EBUSY;
1394         }
1395         gk20a_writel(g, reg, val);
1396         mutex_unlock(&g->clk.clk_mutex);
1397         return count;
1398 }
1399
1400 static const struct file_operations pll_reg_raw_fops = {
1401         .open           = pll_reg_raw_open,
1402         .read           = seq_read,
1403         .write          = pll_reg_raw_write,
1404         .llseek         = seq_lseek,
1405         .release        = single_release,
1406 };
1407
1408 static int monitor_get(void *data, u64 *val)
1409 {
1410         struct gk20a *g = (struct gk20a *)data;
1411         struct clk_gk20a *clk = &g->clk;
1412         u32 clk_slowdown, clk_slowdown_save;
1413         int err;
1414
1415         u32 ncycle = 800; /* count GPCCLK for ncycle of clkin */
1416         u64 freq = clk->gpc_pll.clk_in;
1417         u32 count1, count2;
1418
1419         err = gk20a_busy(g->dev);
1420         if (err)
1421                 return err;
1422
1423         mutex_lock(&g->clk.clk_mutex);
1424
1425         /* Disable clock slowdown during measurements */
1426         clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
1427         clk_slowdown = set_field(clk_slowdown_save,
1428                                  therm_clk_slowdown_idle_factor_m(),
1429                                  therm_clk_slowdown_idle_factor_disabled_f());
1430         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
1431         gk20a_readl(g, therm_clk_slowdown_r(0));
1432
1433         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1434                      trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
1435         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1436                      trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
1437                      trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
1438                      trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
1439         /* start */
1440
1441         /* It should take less than 25us to finish 800 cycle of 38.4MHz.
1442            But longer than 100us delay is required here. */
1443         gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
1444         udelay(200);
1445
1446         count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1447         udelay(100);
1448         count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1449         freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
1450         do_div(freq, ncycle);
1451         *val = freq;
1452
1453         /* Restore clock slowdown */
1454         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
1455         mutex_unlock(&g->clk.clk_mutex);
1456
1457         gk20a_idle(g->dev);
1458
1459         if (count1 != count2)
1460                 return -EBUSY;
1461         return 0;
1462 }
1463 DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
1464
1465 static int clk_gm20b_debugfs_init(struct gk20a *g)
1466 {
1467         struct dentry *d;
1468         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1469
1470         d = debugfs_create_file(
1471                 "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
1472         if (!d)
1473                 goto err_out;
1474
1475         d = debugfs_create_file(
1476                 "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
1477         if (!d)
1478                 goto err_out;
1479
1480         d = debugfs_create_file("pll_reg_raw",
1481                 S_IRUGO, platform->debugfs, g, &pll_reg_raw_fops);
1482         if (!d)
1483                 goto err_out;
1484
1485         d = debugfs_create_file(
1486                 "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
1487         if (!d)
1488                 goto err_out;
1489
1490         return 0;
1491
1492 err_out:
1493         pr_err("%s: Failed to make debugfs node\n", __func__);
1494         debugfs_remove_recursive(platform->debugfs);
1495         return -ENOMEM;
1496 }
1497
1498 #endif /* CONFIG_DEBUG_FS */