gpu: nvgpu: Add GPCPLL DVFS state to debug prints
[linux-3.10.git] / drivers / gpu / nvgpu / gm20b / ltc_gm20b.c
1 /*
2  * GM20B L2
3  *
4  * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  */
15
16 #include <linux/types.h>
17 #include <linux/jiffies.h>
18
19 #include "hw_mc_gm20b.h"
20 #include "hw_ltc_gm20b.h"
21 #include "hw_top_gm20b.h"
22 #include "hw_proj_gm20b.h"
23 #include "hw_pri_ringmaster_gm20b.h"
24
25 #include "gk20a/ltc_common.c"
26 #include "gk20a/gk20a.h"
27 #include "gk20a/gk20a_allocator.h"
28
29 static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
30 {
31         /* max memory size (MB) to cover */
32         u32 max_size = gr->max_comptag_mem;
33         /* one tag line covers 128KB */
34         u32 max_comptag_lines = max_size << 3;
35
36         u32 hw_max_comptag_lines =
37                 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
38
39         u32 cbc_param =
40                 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
41         u32 comptags_per_cacheline =
42                 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
43         u32 cacheline_size =
44                 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
45         u32 slices_per_ltc =
46                 ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param);
47
48         u32 compbit_backing_size;
49
50         int err;
51
52         gk20a_dbg_fn("");
53
54         if (max_comptag_lines == 0) {
55                 gr->compbit_store.size = 0;
56                 return 0;
57         }
58
59         if (max_comptag_lines > hw_max_comptag_lines)
60                 max_comptag_lines = hw_max_comptag_lines;
61
62         compbit_backing_size =
63                 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
64                 cacheline_size * slices_per_ltc * g->ltc_count;
65
66         /* aligned to 2KB * ltc_count */
67         compbit_backing_size +=
68                 g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
69
70         /* must be a multiple of 64KB */
71         compbit_backing_size = roundup(compbit_backing_size, 64*1024);
72
73         max_comptag_lines =
74                 (compbit_backing_size * comptags_per_cacheline) /
75                 (cacheline_size * slices_per_ltc * g->ltc_count);
76
77         if (max_comptag_lines > hw_max_comptag_lines)
78                 max_comptag_lines = hw_max_comptag_lines;
79
80         gk20a_dbg_info("compbit backing store size : %d",
81                 compbit_backing_size);
82         gk20a_dbg_info("max comptag lines : %d",
83                 max_comptag_lines);
84
85         if (tegra_platform_is_linsim())
86                 err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size);
87         else
88                 err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size);
89
90         if (err)
91                 return err;
92
93         gk20a_allocator_init(&gr->comp_tags, "comptag",
94                               1, /* start */
95                               max_comptag_lines - 1, /* length*/
96                               1); /* align */
97
98         gr->comptags_per_cacheline = comptags_per_cacheline;
99         gr->slices_per_ltc = slices_per_ltc;
100         gr->cacheline_size = cacheline_size;
101
102         return 0;
103 }
104
105 static int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
106                               u32 min, u32 max)
107 {
108         int err = 0;
109         struct gr_gk20a *gr = &g->gr;
110         u32 ltc, slice, ctrl1, val, hw_op = 0;
111         unsigned long end_jiffies = jiffies +
112                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
113         u32 delay = GR_IDLE_CHECK_DEFAULT;
114         u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
115                                 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
116
117         gk20a_dbg_fn("");
118
119         if (gr->compbit_store.size == 0)
120                 return 0;
121
122         mutex_lock(&g->mm.l2_op_lock);
123
124         if (op == gk20a_cbc_op_clear) {
125                 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
126                         ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
127                 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
128                         ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
129                 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
130         } else if (op == gk20a_cbc_op_clean) {
131                 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
132         } else if (op == gk20a_cbc_op_invalidate) {
133                 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
134         } else {
135                 BUG_ON(1);
136         }
137         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
138                      gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
139
140         for (ltc = 0; ltc < g->ltc_count; ltc++) {
141                 for (slice = 0; slice < slices_per_ltc; slice++) {
142
143                         delay = GR_IDLE_CHECK_DEFAULT;
144
145                         ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
146                                 ltc * proj_ltc_stride_v() +
147                                 slice * proj_lts_stride_v();
148
149                         do {
150                                 val = gk20a_readl(g, ctrl1);
151                                 if (!(val & hw_op))
152                                         break;
153
154                                 usleep_range(delay, delay * 2);
155                                 delay = min_t(u32, delay << 1,
156                                         GR_IDLE_CHECK_MAX);
157
158                         } while (time_before(jiffies, end_jiffies) |
159                                         !tegra_platform_is_silicon());
160
161                         if (!time_before(jiffies, end_jiffies)) {
162                                 gk20a_err(dev_from_gk20a(g),
163                                            "comp tag clear timeout\n");
164                                 err = -EBUSY;
165                                 goto out;
166                         }
167                 }
168         }
169 out:
170         mutex_unlock(&g->mm.l2_op_lock);
171         return 0;
172 }
173
174 static void gm20b_ltc_init_fs_state(struct gk20a *g)
175 {
176         u32 reg;
177
178         gk20a_dbg_info("initialize gm20b l2");
179
180         g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r());
181         g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r());
182         gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count);
183
184         gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(),
185         g->ltc_count);
186         gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(),
187         g->ltc_count);
188
189         gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(),
190                      gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) |
191                      ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m());
192
193         /* Disable LTC interrupts */
194         reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r());
195         reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m();
196         reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m();
197         gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg);
198 }
199
200 static void gm20b_ltc_isr(struct gk20a *g)
201 {
202         u32 mc_intr, ltc_intr;
203         int ltc, slice;
204
205         mc_intr = gk20a_readl(g, mc_intr_ltc_r());
206         gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
207                   mc_intr);
208         for (ltc = 0; ltc < g->ltc_count; ltc++) {
209                 if ((mc_intr & 1 << ltc) == 0)
210                         continue;
211                 for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
212                         ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
213                                            proj_ltc_stride_v() * ltc +
214                                            proj_lts_stride_v() * slice);
215                         gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
216                                   ltc, slice, ltc_intr);
217                         gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
218                                            proj_ltc_stride_v() * ltc +
219                                            proj_lts_stride_v() * slice,
220                                      ltc_intr);
221                 }
222         }
223 }
224
225 static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
226 {
227         u32 data;
228         bool done[g->ltc_count];
229         s32 retry = 100;
230         int i;
231         int num_done = 0;
232         u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
233
234         gk20a_dbg_fn("");
235
236         for (i = 0; i < g->ltc_count; i++)
237                 done[i] = 0;
238
239         gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
240                      ltc_ltcs_ltss_g_elpg_flush_pending_f());
241         do {
242                 for (i = 0; i < g->ltc_count; i++) {
243                         if (done[i])
244                                 continue;
245
246                         data = gk20a_readl(g,
247                                         ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
248
249                         if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
250                                 gk20a_dbg_info("g_elpg_flush 0x%x", data);
251                         } else {
252                                 done[i] = 1;
253                                 num_done++;
254                         }
255                 }
256
257                 if (num_done < g->ltc_count) {
258                         retry--;
259                         usleep_range(20, 40);
260                 } else
261                         break;
262         } while (retry >= 0 || !tegra_platform_is_silicon());
263
264         if (retry < 0)
265                 gk20a_warn(dev_from_gk20a(g),
266                             "g_elpg_flush too many retries");
267 }
268
269 static u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
270 {
271         u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
272         if (val == 2) {
273                 return base * 2;
274         } else if (val != 1) {
275                 gk20a_err(dev_from_gk20a(g),
276                         "Invalid number of active ltcs: %08x\n", val);
277         }
278
279         return base;
280 }
281
282 /*
283  * Performs a full flush of the L2 cache.
284  */
285 static void gm20b_flush_ltc(struct gk20a *g)
286 {
287         u32 op_pending;
288         unsigned long now, timeout;
289
290 #define __timeout_init()                                \
291         do {                                            \
292                 now = jiffies;  timeout = now + HZ;     \
293         } while (0)
294 #define __timeout_check()                                               \
295         do {                                                            \
296                 if (tegra_platform_is_silicon() && time_after(now, timeout)) { \
297                         gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \
298                         break;                                          \
299                 }                                                       \
300         } while (0)
301
302         /* Clean... */
303         gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(),
304                 ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() |
305                 ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() |
306                 ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() |
307                 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() |
308                 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() |
309                 ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f());
310
311         /* Wait on each LTC individually. */
312         __timeout_init();
313         do {
314                 op_pending = gk20a_readl(g, ltc_ltc0_ltss_tstg_cmgmt1_r());
315                 __timeout_check();
316         } while (op_pending &
317                  ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f());
318
319         __timeout_init();
320         do {
321                 op_pending = gk20a_readl(g, ltc_ltc1_ltss_tstg_cmgmt1_r());
322                 __timeout_check();
323         } while (op_pending &
324                  ltc_ltc1_ltss_tstg_cmgmt1_clean_pending_f());
325
326         /* And invalidate. */
327         gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(),
328              ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() |
329              ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() |
330              ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() |
331              ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() |
332              ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f());
333
334         /* Wait on each LTC individually. */
335         __timeout_init();
336         do {
337                 op_pending = gk20a_readl(g, ltc_ltc0_ltss_tstg_cmgmt0_r());
338                 __timeout_check();
339         } while (op_pending &
340                  ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f());
341
342         __timeout_init();
343         do {
344                 op_pending = gk20a_readl(g, ltc_ltc1_ltss_tstg_cmgmt0_r());
345                 __timeout_check();
346         } while (op_pending &
347                  ltc_ltc1_ltss_tstg_cmgmt0_invalidate_pending_f());
348 }
349
350 static int gm20b_determine_L2_size_bytes(struct gk20a *g)
351 {
352         u32 lts_per_ltc;
353         u32 ways;
354         u32 sets;
355         u32 bytes_per_line;
356         u32 active_ltcs;
357         u32 cache_size;
358
359         u32 tmp;
360         u32 active_sets_value;
361
362         tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
363         ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));
364
365         active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
366         if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
367                 sets = 64;
368         } else if (active_sets_value ==
369                  ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
370                 sets = 32;
371         } else if (active_sets_value ==
372                  ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
373                 sets = 16;
374         } else {
375                 dev_err(dev_from_gk20a(g),
376                         "Unknown constant %u for active sets",
377                        (unsigned)active_sets_value);
378                 sets = 0;
379         }
380
381         active_ltcs = g->gr.num_fbps;
382
383         /* chip-specific values */
384         lts_per_ltc = 2;
385         bytes_per_line = 128;
386         cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;
387
388         return cache_size;
389 }
390
391 void gm20b_init_ltc(struct gpu_ops *gops)
392 {
393         /* Gk20a reused ops. */
394         gops->ltc.determine_L2_size_bytes = gm20b_determine_L2_size_bytes;
395         gops->ltc.set_max_ways_evict_last = gk20a_ltc_set_max_ways_evict_last;
396         gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry;
397         gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry;
398         gops->ltc.init_cbc = gk20a_ltc_init_cbc;
399
400         /* GM20b specific ops. */
401         gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
402         gops->ltc.init_comptags = gm20b_ltc_init_comptags;
403         gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
404         gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
405         gops->ltc.isr = gm20b_ltc_isr;
406         gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config;
407         gops->ltc.flush = gm20b_flush_ltc;
408 #ifdef CONFIG_DEBUG_FS
409         gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs;
410 #endif
411 }