gpu: nvgpu: Bypass for GM20B post-divider change
[linux-3.10.git] / drivers / gpu / nvgpu / gm20b / clk_gm20b.c
1 /*
2  * GM20B Clocks
3  *
4  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/clk.h>
20 #include <linux/delay.h>        /* for mdelay */
21 #include <linux/module.h>
22 #include <linux/debugfs.h>
23 #include <linux/uaccess.h>
24 #include <linux/clk/tegra.h>
25
26 #include "gk20a/gk20a.h"
27 #include "hw_trim_gm20b.h"
28 #include "hw_timer_gm20b.h"
29 #include "hw_therm_gm20b.h"
30 #include "clk_gm20b.h"
31
32 #define gk20a_dbg_clk(fmt, arg...) \
33         gk20a_dbg(gpu_dbg_clk, fmt, ##arg)
34
35 /* from vbios PLL info table */
36 static struct pll_parms gpc_pll_params = {
37         128000,  2600000,       /* freq */
38         1300000, 2600000,       /* vco */
39         12000,   38400,         /* u */
40         1, 255,                 /* M */
41         8, 255,                 /* N */
42         1, 31,                  /* PL */
43 };
44
45 #ifdef CONFIG_DEBUG_FS
46 static int clk_gm20b_debugfs_init(struct gk20a *g);
47 #endif
48
49 #define DUMP_REG(addr_func) \
50 do {                                                                    \
51         addr = trim_sys_##addr_func##_r();                              \
52         data = gk20a_readl(g, addr);                                    \
53         pr_info(#addr_func "[0x%x] = 0x%x\n", addr, data);              \
54 } while (0)
55
56 static void dump_gpc_pll(struct gk20a *g, struct pll *gpll, u32 last_cfg)
57 {
58         u32 addr, data;
59
60         pr_info("**** GPCPLL DUMP ****");
61         pr_info("gpcpll s/w M=%u N=%u P=%u\n", gpll->M, gpll->N, gpll->PL);
62         pr_info("gpcpll_cfg_last = 0x%x\n", last_cfg);
63         DUMP_REG(gpcpll_cfg);
64         DUMP_REG(gpcpll_coeff);
65         DUMP_REG(sel_vco);
66         pr_info("\n");
67 }
68
69 /* 1:1 match between post divider settings and divisor value */
70 static inline u32 pl_to_div(u32 pl)
71 {
72         return pl;
73 }
74
75 static inline u32 div_to_pl(u32 div)
76 {
77         return div;
78 }
79
80 /* FIXME: remove after on-silicon testing */
81 #define PLDIV_GLITCHLESS 0
82
83 /* Calculate and update M/N/PL as well as pll->freq
84     ref_clk_f = clk_in_f;
85     u_f = ref_clk_f / M;
86     vco_f = u_f * N = ref_clk_f * N / M;
87     PLL output = gpc2clk = target clock frequency = vco_f / pl_to_pdiv(PL);
88     gpcclk = gpc2clk / 2; */
89 static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll,
90         struct pll_parms *pll_params, u32 *target_freq, bool best_fit)
91 {
92         u32 min_vco_f, max_vco_f;
93         u32 best_M, best_N;
94         u32 low_PL, high_PL, best_PL;
95         u32 m, n, n2;
96         u32 target_vco_f, vco_f;
97         u32 ref_clk_f, target_clk_f, u_f;
98         u32 delta, lwv, best_delta = ~0;
99         u32 pl;
100
101         BUG_ON(target_freq == NULL);
102
103         gk20a_dbg_fn("request target freq %d MHz", *target_freq);
104
105         ref_clk_f = pll->clk_in;
106         target_clk_f = *target_freq;
107         max_vco_f = pll_params->max_vco;
108         min_vco_f = pll_params->min_vco;
109         best_M = pll_params->max_M;
110         best_N = pll_params->min_N;
111         best_PL = pll_params->min_PL;
112
113         target_vco_f = target_clk_f + target_clk_f / 50;
114         if (max_vco_f < target_vco_f)
115                 max_vco_f = target_vco_f;
116
117         /* Set PL search boundaries. */
118         high_PL = div_to_pl((max_vco_f + target_vco_f - 1) / target_vco_f);
119         high_PL = min(high_PL, pll_params->max_PL);
120         high_PL = max(high_PL, pll_params->min_PL);
121
122         low_PL = div_to_pl(min_vco_f / target_vco_f);
123         low_PL = min(low_PL, pll_params->max_PL);
124         low_PL = max(low_PL, pll_params->min_PL);
125
126         gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)",
127                         low_PL, pl_to_div(low_PL), high_PL, pl_to_div(high_PL));
128
129         for (pl = low_PL; pl <= high_PL; pl++) {
130                 target_vco_f = target_clk_f * pl_to_div(pl);
131
132                 for (m = pll_params->min_M; m <= pll_params->max_M; m++) {
133                         u_f = ref_clk_f / m;
134
135                         if (u_f < pll_params->min_u)
136                                 break;
137                         if (u_f > pll_params->max_u)
138                                 continue;
139
140                         n = (target_vco_f * m) / ref_clk_f;
141                         n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f;
142
143                         if (n > pll_params->max_N)
144                                 break;
145
146                         for (; n <= n2; n++) {
147                                 if (n < pll_params->min_N)
148                                         continue;
149                                 if (n > pll_params->max_N)
150                                         break;
151
152                                 vco_f = ref_clk_f * n / m;
153
154                                 if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
155                                         lwv = (vco_f + (pl_to_div(pl) / 2))
156                                                 / pl_to_div(pl);
157                                         delta = abs(lwv - target_clk_f);
158
159                                         if (delta < best_delta) {
160                                                 best_delta = delta;
161                                                 best_M = m;
162                                                 best_N = n;
163                                                 best_PL = pl;
164
165                                                 if (best_delta == 0 ||
166                                                     /* 0.45% for non best fit */
167                                                     (!best_fit && (vco_f / best_delta > 218))) {
168                                                         goto found_match;
169                                                 }
170
171                                                 gk20a_dbg_info("delta %d @ M %d, N %d, PL %d",
172                                                         delta, m, n, pl);
173                                         }
174                                 }
175                         }
176                 }
177         }
178
179 found_match:
180         BUG_ON(best_delta == ~0);
181
182         if (best_fit && best_delta != 0)
183                 gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll",
184                         target_clk_f);
185
186         pll->M = best_M;
187         pll->N = best_N;
188         pll->PL = best_PL;
189
190         /* save current frequency */
191         pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div(pll->PL));
192
193         *target_freq = pll->freq;
194
195         gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)",
196                 *target_freq, pll->M, pll->N, pll->PL, pl_to_div(pll->PL));
197
198         gk20a_dbg_fn("done");
199
200         return 0;
201 }
202
203 static void clk_setup_slide(struct gk20a *g, u32 clk_u)
204 {
205         u32 data, step_a, step_b;
206
207         switch (clk_u) {
208         case 12000:
209         case 12800:
210         case 13000:                     /* only on FPGA */
211                 step_a = 0x2B;
212                 step_b = 0x0B;
213                 break;
214         case 19200:
215                 step_a = 0x12;
216                 step_b = 0x08;
217                 break;
218         case 38400:
219                 step_a = 0x04;
220                 step_b = 0x05;
221                 break;
222         default:
223                 gk20a_err(dev_from_gk20a(g), "Unexpected reference rate %u kHz",
224                           clk_u);
225                 BUG();
226         }
227
228         /* setup */
229         data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
230         data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
231                         trim_sys_gpcpll_cfg2_pll_stepa_f(step_a));
232         gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
233         data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
234         data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
235                         trim_sys_gpcpll_cfg3_pll_stepb_f(step_b));
236         gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);
237 }
238
239 static int clk_slide_gpc_pll(struct gk20a *g, struct pll *gpll)
240 {
241         u32 data, coeff;
242         u32 nold;
243         int ramp_timeout = 500;
244
245         /* get old coefficients */
246         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
247         nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);
248
249         /* do nothing if NDIV is same */
250         if (gpll->N == nold)
251                 return 0;
252
253         /* dynamic ramp setup based on update rate */
254         clk_setup_slide(g, gpll->clk_in / gpll->M);
255
256         /* pll slowdown mode */
257         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
258         data = set_field(data,
259                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
260                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
261         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
262
263         /* new ndiv ready for ramp */
264         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
265         coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
266                         trim_sys_gpcpll_coeff_ndiv_f(gpll->N));
267         udelay(1);
268         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
269
270         /* dynamic ramp to new ndiv */
271         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
272         data = set_field(data,
273                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
274                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
275         udelay(1);
276         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
277
278         do {
279                 udelay(1);
280                 ramp_timeout--;
281                 data = gk20a_readl(
282                         g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
283                 if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
284                         break;
285         } while (ramp_timeout > 0);
286
287         /* exit slowdown mode */
288         data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
289         data = set_field(data,
290                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
291                         trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
292         data = set_field(data,
293                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
294                         trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
295         gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
296         gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
297
298         if (ramp_timeout <= 0) {
299                 gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
300                 return -ETIMEDOUT;
301         }
302         return 0;
303 }
304
305 static int clk_lock_gpc_pll_under_bypass(struct gk20a *g, struct pll *gpll)
306 {
307         u32 data, cfg, coeff, timeout;
308
309         /* put PLL in bypass before programming it */
310         data = gk20a_readl(g, trim_sys_sel_vco_r());
311         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
312                 trim_sys_sel_vco_gpc2clk_out_bypass_f());
313         gk20a_writel(g, trim_sys_sel_vco_r(), data);
314
315         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
316         if (trim_sys_gpcpll_cfg_iddq_v(cfg)) {
317                 /* get out from IDDQ (1st power up) */
318                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_iddq_m(),
319                                 trim_sys_gpcpll_cfg_iddq_power_on_v());
320                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
321                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
322                 udelay(5);
323         } else {
324                 /* clear SYNC_MODE before disabling PLL */
325                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
326                                 trim_sys_gpcpll_cfg_sync_mode_disable_f());
327                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
328                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
329
330                 /* disable running PLL before changing coefficients */
331                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
332                                 trim_sys_gpcpll_cfg_enable_no_f());
333                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
334                 gk20a_readl(g, trim_sys_gpcpll_cfg_r());
335         }
336
337         /* change coefficients */
338         coeff = trim_sys_gpcpll_coeff_mdiv_f(gpll->M) |
339                 trim_sys_gpcpll_coeff_ndiv_f(gpll->N) |
340                 trim_sys_gpcpll_coeff_pldiv_f(gpll->PL);
341         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
342
343         /* enable PLL after changing coefficients */
344         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
345         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
346                         trim_sys_gpcpll_cfg_enable_yes_f());
347         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
348
349         /* lock pll */
350         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
351         if (cfg & trim_sys_gpcpll_cfg_enb_lckdet_power_off_f()){
352                 cfg = set_field(cfg, trim_sys_gpcpll_cfg_enb_lckdet_m(),
353                         trim_sys_gpcpll_cfg_enb_lckdet_power_on_f());
354                 gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
355         }
356
357         /* wait pll lock */
358         timeout = g->clk.pll_delay / 2 + 1;
359         do {
360                 cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
361                 if (cfg & trim_sys_gpcpll_cfg_pll_lock_true_f())
362                         goto pll_locked;
363                 udelay(2);
364         } while (--timeout > 0);
365
366         /* PLL is messed up. What can we do here? */
367         dump_gpc_pll(g, gpll, cfg);
368         BUG();
369         return -EBUSY;
370
371 pll_locked:
372         gk20a_dbg_clk("locked config_pll under bypass r=0x%x v=0x%x",
373                 trim_sys_gpcpll_cfg_r(), cfg);
374
375         /* set SYNC_MODE for glitchless switch out of bypass */
376         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
377                         trim_sys_gpcpll_cfg_sync_mode_enable_f());
378         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
379         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
380
381         /* put PLL back on vco */
382         data = gk20a_readl(g, trim_sys_sel_vco_r());
383         data = set_field(data, trim_sys_sel_vco_gpc2clk_out_m(),
384                 trim_sys_sel_vco_gpc2clk_out_vco_f());
385         gk20a_writel(g, trim_sys_sel_vco_r(), data);
386
387         return 0;
388 }
389
390 static int clk_program_gpc_pll(struct gk20a *g, struct pll *gpll_new,
391                         int allow_slide)
392 {
393 #if PLDIV_GLITCHLESS
394         bool skip_bypass;
395 #else
396         u32 data;
397 #endif
398         u32 cfg, coeff;
399         bool can_slide;
400         struct pll gpll;
401
402         gk20a_dbg_fn("");
403
404         if (!tegra_platform_is_silicon())
405                 return 0;
406
407         /* get old coefficients */
408         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
409         gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
410         gpll.N = trim_sys_gpcpll_coeff_ndiv_v(coeff);
411         gpll.PL = trim_sys_gpcpll_coeff_pldiv_v(coeff);
412         gpll.clk_in = gpll_new->clk_in;
413
414         /* do NDIV slide if there is no change in M and PL */
415         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
416         can_slide = allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg);
417
418         if (can_slide && (gpll_new->M == gpll.M) && (gpll_new->PL == gpll.PL))
419                 return clk_slide_gpc_pll(g, gpll_new);
420
421         /* slide down to NDIV_LO */
422         if (can_slide) {
423                 int ret;
424                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
425                                       gpll.clk_in);
426                 ret = clk_slide_gpc_pll(g, &gpll);
427                 if (ret)
428                         return ret;
429         }
430
431 #if PLDIV_GLITCHLESS
432         /*
433          * Limit either FO-to-FO (path A below) or FO-to-bypass (path B below)
434          * jump to min_vco/2 by setting post divider >= 1:2.
435          */
436         skip_bypass = can_slide && (gpll_new->M == gpll.M);
437         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
438         if ((skip_bypass && (gpll_new->PL < 2)) || (gpll.PL < 2)) {
439                 if (gpll.PL != 2) {
440                         coeff = set_field(coeff,
441                                 trim_sys_gpcpll_coeff_pldiv_m(),
442                                 trim_sys_gpcpll_coeff_pldiv_f(2));
443                         gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
444                         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
445                         udelay(2);
446                 }
447         }
448
449         if (skip_bypass)
450                 goto set_pldiv; /* path A: no need to bypass */
451
452         /* path B: bypass if either M changes or PLL is disabled */
453 #else
454         /* split FO-to-bypass jump in halfs by setting out divider 1:2 */
455         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
456         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
457                 trim_sys_gpc2clk_out_vcodiv_f(2));
458         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
459         gk20a_readl(g, trim_sys_gpc2clk_out_r());
460         udelay(2);
461 #endif
462         /*
463          * Program and lock pll under bypass. On exit PLL is out of bypass,
464          * enabled, and locked. VCO is at vco_min if sliding is allowed.
465          * Otherwise it is at VCO target (and therefore last slide call below
466          * is effectively NOP). PL is preserved (not set to target) of post
467          * divider is glitchless. Otherwise it is at PL target.
468          */
469         gpll = *gpll_new;
470         if (allow_slide)
471                 gpll.N = DIV_ROUND_UP(gpll_new->M * gpc_pll_params.min_vco,
472                                       gpll_new->clk_in);
473 #if PLDIV_GLITCHLESS
474         gpll.PL = (gpll_new->PL < 2) ? 2 : gpll_new->PL;
475 #endif
476         clk_lock_gpc_pll_under_bypass(g, &gpll);
477
478 #if PLDIV_GLITCHLESS
479         coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
480         udelay(2);
481
482 set_pldiv:
483         /* coeff must be current from either path A or B */
484         if (trim_sys_gpcpll_coeff_pldiv_v(coeff) != gpll_new->PL) {
485                 coeff = set_field(coeff, trim_sys_gpcpll_coeff_pldiv_m(),
486                         trim_sys_gpcpll_coeff_pldiv_f(gpll_new->PL));
487                 gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);
488         }
489 #else
490         /* restore out divider 1:1 */
491         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
492         data = set_field(data, trim_sys_gpc2clk_out_vcodiv_m(),
493                 trim_sys_gpc2clk_out_vcodiv_by1_f());
494         udelay(2);
495         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
496 #endif
497         /* slide up to target NDIV */
498         return clk_slide_gpc_pll(g, gpll_new);
499 }
500
501 static int clk_disable_gpcpll(struct gk20a *g, int allow_slide)
502 {
503         u32 cfg, coeff;
504         struct clk_gk20a *clk = &g->clk;
505         struct pll gpll = clk->gpc_pll;
506
507         /* slide to VCO min */
508         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
509         if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) {
510                 coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
511                 gpll.M = trim_sys_gpcpll_coeff_mdiv_v(coeff);
512                 gpll.N = DIV_ROUND_UP(gpll.M * gpc_pll_params.min_vco,
513                                       gpll.clk_in);
514                 clk_slide_gpc_pll(g, &gpll);
515         }
516
517         /* put PLL in bypass before disabling it */
518         cfg = gk20a_readl(g, trim_sys_sel_vco_r());
519         cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(),
520                         trim_sys_sel_vco_gpc2clk_out_bypass_f());
521         gk20a_writel(g, trim_sys_sel_vco_r(), cfg);
522
523         /* clear SYNC_MODE before disabling PLL */
524         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
525         cfg = set_field(cfg, trim_sys_gpcpll_cfg_sync_mode_m(),
526                         trim_sys_gpcpll_cfg_sync_mode_disable_f());
527         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
528
529         /* disable PLL */
530         cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
531         cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(),
532                         trim_sys_gpcpll_cfg_enable_no_f());
533         gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg);
534         gk20a_readl(g, trim_sys_gpcpll_cfg_r());
535
536         clk->gpc_pll.enabled = false;
537         return 0;
538 }
539
540 static int gm20b_init_clk_reset_enable_hw(struct gk20a *g)
541 {
542         gk20a_dbg_fn("");
543         return 0;
544 }
545
546 struct clk *gm20b_clk_get(struct gk20a *g)
547 {
548         if (!g->clk.tegra_clk) {
549                 struct clk *clk;
550
551                 clk = clk_get_sys("tegra_gk20a", "gpu");
552                 if (IS_ERR(clk)) {
553                         gk20a_err(dev_from_gk20a(g),
554                                 "fail to get tegra gpu clk tegra_gk20a/gpu");
555                         return NULL;
556                 }
557                 g->clk.tegra_clk = clk;
558         }
559
560         return g->clk.tegra_clk;
561 }
562
563 static int gm20b_init_clk_setup_sw(struct gk20a *g)
564 {
565         struct clk_gk20a *clk = &g->clk;
566         static int initialized;
567         struct clk *ref;
568         unsigned long ref_rate;
569
570         gk20a_dbg_fn("");
571
572         if (clk->sw_ready) {
573                 gk20a_dbg_fn("skip init");
574                 return 0;
575         }
576
577         if (!gk20a_clk_get(g))
578                 return -EINVAL;
579
580         ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
581         if (IS_ERR(ref)) {
582                 gk20a_err(dev_from_gk20a(g),
583                         "failed to get GPCPLL reference clock");
584                 return -EINVAL;
585         }
586         ref_rate = clk_get_rate(ref);
587
588         clk->pll_delay = 300; /* usec */
589
590         clk->gpc_pll.id = GK20A_GPC_PLL;
591         clk->gpc_pll.clk_in = ref_rate / KHZ;
592
593         /* Initial frequency: 1/3 VCO min (low enough to be safe at Vmin) */
594         if (!initialized) {
595                 initialized = 1;
596                 clk->gpc_pll.M = 1;
597                 clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
598                                         clk->gpc_pll.clk_in);
599                 clk->gpc_pll.PL = 3;
600                 clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
601                 clk->gpc_pll.freq /= pl_to_div(clk->gpc_pll.PL);
602         }
603
604         mutex_init(&clk->clk_mutex);
605
606         clk->sw_ready = true;
607
608         gk20a_dbg_fn("done");
609         return 0;
610 }
611
612 static int gm20b_init_clk_setup_hw(struct gk20a *g)
613 {
614         u32 data;
615
616         gk20a_dbg_fn("");
617
618         /* LDIV: Div4 mode (required); both  bypass and vco ratios 1:1 */
619         data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
620         data = set_field(data,
621                         trim_sys_gpc2clk_out_sdiv14_m() |
622                         trim_sys_gpc2clk_out_vcodiv_m() |
623                         trim_sys_gpc2clk_out_bypdiv_m(),
624                         trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
625                         trim_sys_gpc2clk_out_vcodiv_by1_f() |
626                         trim_sys_gpc2clk_out_bypdiv_f(0));
627         gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);
628
629         /*
630          * Clear global bypass control; PLL is still under bypass, since SEL_VCO
631          * is cleared by default.
632          */
633         data = gk20a_readl(g, trim_sys_bypassctrl_r());
634         data = set_field(data, trim_sys_bypassctrl_gpcpll_m(),
635                          trim_sys_bypassctrl_gpcpll_vco_f());
636         gk20a_writel(g, trim_sys_bypassctrl_r(), data);
637
638         /* Disable idle slow down */
639         data = gk20a_readl(g, therm_clk_slowdown_r(0));
640         data = set_field(data, therm_clk_slowdown_idle_factor_m(),
641                          therm_clk_slowdown_idle_factor_disabled_f());
642         gk20a_writel(g, therm_clk_slowdown_r(0), data);
643         gk20a_readl(g, therm_clk_slowdown_r(0));
644
645         return 0;
646 }
647
648 static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
649 {
650         struct clk_gk20a *clk = &g->clk;
651
652         if (freq > gpc_pll_params.max_freq)
653                 freq = gpc_pll_params.max_freq;
654         else if (freq < gpc_pll_params.min_freq)
655                 freq = gpc_pll_params.min_freq;
656
657         if (freq != old_freq) {
658                 /* gpc_pll.freq is changed to new value here */
659                 if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
660                                    &freq, true)) {
661                         gk20a_err(dev_from_gk20a(g),
662                                    "failed to set pll target for %d", freq);
663                         return -EINVAL;
664                 }
665         }
666         return 0;
667 }
668
669 static int set_pll_freq(struct gk20a *g, int allow_slide)
670 {
671         struct clk_gk20a *clk = &g->clk;
672         int err = 0;
673
674         gk20a_dbg_fn("last freq: %dMHz, target freq %dMHz",
675                      clk->gpc_pll_last.freq, clk->gpc_pll.freq);
676
677         /* If programming with dynamic sliding failed, re-try under bypass */
678         err = clk_program_gpc_pll(g, &clk->gpc_pll, allow_slide);
679         if (err && allow_slide)
680                 err = clk_program_gpc_pll(g, &clk->gpc_pll, 0);
681
682         if (!err) {
683                 clk->gpc_pll.enabled = true;
684                 clk->gpc_pll_last = clk->gpc_pll;
685                 return 0;
686         }
687
688         /*
689          * Just report error but not restore PLL since dvfs could already change
690          * voltage even when programming failed.
691          */
692         gk20a_err(dev_from_gk20a(g), "failed to set pll to %d",
693                   clk->gpc_pll.freq);
694         return err;
695 }
696
697 static int gm20b_clk_export_set_rate(void *data, unsigned long *rate)
698 {
699         u32 old_freq;
700         int ret = -ENODATA;
701         struct gk20a *g = data;
702         struct clk_gk20a *clk = &g->clk;
703
704         if (rate) {
705                 mutex_lock(&clk->clk_mutex);
706                 old_freq = clk->gpc_pll.freq;
707                 ret = set_pll_target(g, rate_gpu_to_gpc2clk(*rate), old_freq);
708                 if (!ret && clk->gpc_pll.enabled && clk->clk_hw_on)
709                         ret = set_pll_freq(g, 1);
710                 if (!ret)
711                         *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
712                 mutex_unlock(&clk->clk_mutex);
713         }
714         return ret;
715 }
716
717 static int gm20b_clk_export_enable(void *data)
718 {
719         int ret = 0;
720         struct gk20a *g = data;
721         struct clk_gk20a *clk = &g->clk;
722
723         mutex_lock(&clk->clk_mutex);
724         if (!clk->gpc_pll.enabled && clk->clk_hw_on)
725                 ret = set_pll_freq(g, 1);
726         mutex_unlock(&clk->clk_mutex);
727         return ret;
728 }
729
730 static void gm20b_clk_export_disable(void *data)
731 {
732         struct gk20a *g = data;
733         struct clk_gk20a *clk = &g->clk;
734
735         mutex_lock(&clk->clk_mutex);
736         if (clk->gpc_pll.enabled && clk->clk_hw_on)
737                 clk_disable_gpcpll(g, 1);
738         mutex_unlock(&clk->clk_mutex);
739 }
740
741 static void gm20b_clk_export_init(void *data, unsigned long *rate, bool *state)
742 {
743         struct gk20a *g = data;
744         struct clk_gk20a *clk = &g->clk;
745
746         mutex_lock(&clk->clk_mutex);
747         if (state)
748                 *state = clk->gpc_pll.enabled;
749         if (rate)
750                 *rate = rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
751         mutex_unlock(&clk->clk_mutex);
752 }
753
754 static struct tegra_clk_export_ops gm20b_clk_export_ops = {
755         .init = gm20b_clk_export_init,
756         .enable = gm20b_clk_export_enable,
757         .disable = gm20b_clk_export_disable,
758         .set_rate = gm20b_clk_export_set_rate,
759 };
760
761 static int gm20b_clk_register_export_ops(struct gk20a *g)
762 {
763         int ret;
764         struct clk *c;
765
766         if (gm20b_clk_export_ops.data)
767                 return 0;
768
769         gm20b_clk_export_ops.data = (void *)g;
770         c = g->clk.tegra_clk;
771         if (!c || !clk_get_parent(c))
772                 return -ENOSYS;
773
774         ret = tegra_clk_register_export_ops(clk_get_parent(c),
775                                             &gm20b_clk_export_ops);
776
777         return ret;
778 }
779
780 static int gm20b_init_clk_support(struct gk20a *g)
781 {
782         struct clk_gk20a *clk = &g->clk;
783         u32 err;
784
785         gk20a_dbg_fn("");
786
787         clk->g = g;
788
789         err = gm20b_init_clk_reset_enable_hw(g);
790         if (err)
791                 return err;
792
793         err = gm20b_init_clk_setup_sw(g);
794         if (err)
795                 return err;
796
797         mutex_lock(&clk->clk_mutex);
798         clk->clk_hw_on = true;
799
800         err = gm20b_init_clk_setup_hw(g);
801         mutex_unlock(&clk->clk_mutex);
802         if (err)
803                 return err;
804
805         err = gm20b_clk_register_export_ops(g);
806         if (err)
807                 return err;
808
809         /* FIXME: this effectively prevents host level clock gating */
810         err = clk_enable(g->clk.tegra_clk);
811         if (err)
812                 return err;
813
814         /* The prev call may not enable PLL if gbus is unbalanced - force it */
815         mutex_lock(&clk->clk_mutex);
816         if (!clk->gpc_pll.enabled)
817                 err = set_pll_freq(g, 1);
818         mutex_unlock(&clk->clk_mutex);
819         if (err)
820                 return err;
821
822 #ifdef CONFIG_DEBUG_FS
823         if (!clk->debugfs_set) {
824                 if (!clk_gm20b_debugfs_init(g))
825                         clk->debugfs_set = true;
826         }
827 #endif
828         return err;
829 }
830
831 static int gm20b_suspend_clk_support(struct gk20a *g)
832 {
833         int ret = 0;
834
835         clk_disable(g->clk.tegra_clk);
836
837         /* The prev call may not disable PLL if gbus is unbalanced - force it */
838         mutex_lock(&g->clk.clk_mutex);
839         if (g->clk.gpc_pll.enabled)
840                 ret = clk_disable_gpcpll(g, 1);
841         g->clk.clk_hw_on = false;
842         mutex_unlock(&g->clk.clk_mutex);
843         return ret;
844 }
845
846 void gm20b_init_clk_ops(struct gpu_ops *gops)
847 {
848         gops->clk.init_clk_support = gm20b_init_clk_support;
849         gops->clk.suspend_clk_support = gm20b_suspend_clk_support;
850 }
851
852 #ifdef CONFIG_DEBUG_FS
853
854 static int rate_get(void *data, u64 *val)
855 {
856         struct gk20a *g = (struct gk20a *)data;
857         *val = (u64)gk20a_clk_get_rate(g);
858         return 0;
859 }
860 static int rate_set(void *data, u64 val)
861 {
862         struct gk20a *g = (struct gk20a *)data;
863         return gk20a_clk_set_rate(g, (u32)val);
864 }
865 DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
866
867 static int pll_reg_show(struct seq_file *s, void *data)
868 {
869         struct gk20a *g = s->private;
870         u32 reg, m, n, pl, f;
871
872         mutex_lock(&g->clk.clk_mutex);
873         if (!g->clk.clk_hw_on) {
874                 seq_printf(s, "gk20a powered down - no access to registers\n");
875                 mutex_unlock(&g->clk.clk_mutex);
876                 return 0;
877         }
878
879         reg = gk20a_readl(g, trim_sys_bypassctrl_r());
880         seq_printf(s, "bypassctrl = %s, ", reg ? "bypass" : "vco");
881         reg = gk20a_readl(g, trim_sys_sel_vco_r());
882         seq_printf(s, "sel_vco = %s, ", reg ? "vco" : "bypass");
883
884         reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
885         seq_printf(s, "cfg  = 0x%x : %s : %s : %s\n", reg,
886                 trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
887                 trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked",
888                 trim_sys_gpcpll_cfg_sync_mode_v(reg) ? "sync_on" : "sync_off");
889
890         reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
891         m = trim_sys_gpcpll_coeff_mdiv_v(reg);
892         n = trim_sys_gpcpll_coeff_ndiv_v(reg);
893         pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
894         f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div(pl));
895         seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
896         seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
897         mutex_unlock(&g->clk.clk_mutex);
898         return 0;
899 }
900
901 static int pll_reg_open(struct inode *inode, struct file *file)
902 {
903         return single_open(file, pll_reg_show, inode->i_private);
904 }
905
906 static const struct file_operations pll_reg_fops = {
907         .open           = pll_reg_open,
908         .read           = seq_read,
909         .llseek         = seq_lseek,
910         .release        = single_release,
911 };
912
913 static int pll_reg_raw_show(struct seq_file *s, void *data)
914 {
915         struct gk20a *g = s->private;
916         u32 reg;
917
918         mutex_lock(&g->clk.clk_mutex);
919         if (!g->clk.clk_hw_on) {
920                 seq_puts(s, "gk20a powered down - no access to registers\n");
921                 mutex_unlock(&g->clk.clk_mutex);
922                 return 0;
923         }
924
925         seq_puts(s, "GPCPLL REGISTERS:\n");
926         for (reg = trim_sys_gpcpll_cfg_r(); reg <= trim_sys_gpcpll_dvfs2_r();
927               reg += sizeof(u32))
928                 seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
929
930         seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
931
932         reg = trim_sys_sel_vco_r();
933         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
934         reg = trim_sys_gpc2clk_out_r();
935         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
936         reg = trim_sys_bypassctrl_r();
937         seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
938
939         mutex_unlock(&g->clk.clk_mutex);
940         return 0;
941 }
942
943 static int pll_reg_raw_open(struct inode *inode, struct file *file)
944 {
945         return single_open(file, pll_reg_raw_show, inode->i_private);
946 }
947
948 static ssize_t pll_reg_raw_write(struct file *file,
949         const char __user *userbuf, size_t count, loff_t *ppos)
950 {
951         struct gk20a *g = file->f_path.dentry->d_inode->i_private;
952         char buf[80];
953         u32 reg, val;
954
955         if (sizeof(buf) <= count)
956                 return -EINVAL;
957
958         if (copy_from_user(buf, userbuf, count))
959                 return -EFAULT;
960
961         /* terminate buffer and trim - white spaces may be appended
962          *  at the end when invoked from shell command line */
963         buf[count] = '\0';
964         strim(buf);
965
966         if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
967                 return -EINVAL;
968
969         if (((reg < trim_sys_gpcpll_cfg_r()) ||
970             (reg > trim_sys_gpcpll_dvfs2_r())) &&
971             (reg != trim_sys_sel_vco_r()) &&
972             (reg != trim_sys_gpc2clk_out_r()) &&
973             (reg != trim_sys_bypassctrl_r()))
974                 return -EPERM;
975
976         mutex_lock(&g->clk.clk_mutex);
977         if (!g->clk.clk_hw_on) {
978                 mutex_unlock(&g->clk.clk_mutex);
979                 return -EBUSY;
980         }
981         gk20a_writel(g, reg, val);
982         mutex_unlock(&g->clk.clk_mutex);
983         return count;
984 }
985
986 static const struct file_operations pll_reg_raw_fops = {
987         .open           = pll_reg_raw_open,
988         .read           = seq_read,
989         .write          = pll_reg_raw_write,
990         .llseek         = seq_lseek,
991         .release        = single_release,
992 };
993
994 static int monitor_get(void *data, u64 *val)
995 {
996         struct gk20a *g = (struct gk20a *)data;
997         struct clk_gk20a *clk = &g->clk;
998         u32 clk_slowdown, clk_slowdown_save;
999         int err;
1000
1001         u32 ncycle = 800; /* count GPCCLK for ncycle of clkin */
1002         u64 freq = clk->gpc_pll.clk_in;
1003         u32 count1, count2;
1004
1005         err = gk20a_busy(g->dev);
1006         if (err)
1007                 return err;
1008
1009         mutex_lock(&g->clk.clk_mutex);
1010
1011         /* Disable clock slowdown during measurements */
1012         clk_slowdown_save = gk20a_readl(g, therm_clk_slowdown_r(0));
1013         clk_slowdown = set_field(clk_slowdown_save,
1014                                  therm_clk_slowdown_idle_factor_m(),
1015                                  therm_clk_slowdown_idle_factor_disabled_f());
1016         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown);
1017         gk20a_readl(g, therm_clk_slowdown_r(0));
1018
1019         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1020                      trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
1021         gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
1022                      trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
1023                      trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
1024                      trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
1025         /* start */
1026
1027         /* It should take less than 25us to finish 800 cycle of 38.4MHz.
1028            But longer than 100us delay is required here. */
1029         gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
1030         udelay(200);
1031
1032         count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1033         udelay(100);
1034         count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
1035         freq *= trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2);
1036         do_div(freq, ncycle);
1037         *val = freq;
1038
1039         /* Restore clock slowdown */
1040         gk20a_writel(g, therm_clk_slowdown_r(0), clk_slowdown_save);
1041         mutex_unlock(&g->clk.clk_mutex);
1042
1043         gk20a_idle(g->dev);
1044
1045         if (count1 != count2)
1046                 return -EBUSY;
1047         return 0;
1048 }
1049 DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
1050
1051 static int clk_gm20b_debugfs_init(struct gk20a *g)
1052 {
1053         struct dentry *d;
1054         struct gk20a_platform *platform = platform_get_drvdata(g->dev);
1055
1056         d = debugfs_create_file(
1057                 "rate", S_IRUGO|S_IWUSR, platform->debugfs, g, &rate_fops);
1058         if (!d)
1059                 goto err_out;
1060
1061         d = debugfs_create_file(
1062                 "pll_reg", S_IRUGO, platform->debugfs, g, &pll_reg_fops);
1063         if (!d)
1064                 goto err_out;
1065
1066         d = debugfs_create_file("pll_reg_raw",
1067                 S_IRUGO, platform->debugfs, g, &pll_reg_raw_fops);
1068         if (!d)
1069                 goto err_out;
1070
1071         d = debugfs_create_file(
1072                 "monitor", S_IRUGO, platform->debugfs, g, &monitor_fops);
1073         if (!d)
1074                 goto err_out;
1075
1076         return 0;
1077
1078 err_out:
1079         pr_err("%s: Failed to make debugfs node\n", __func__);
1080         debugfs_remove_recursive(platform->debugfs);
1081         return -ENOMEM;
1082 }
1083
1084 #endif /* CONFIG_DEBUG_FS */