2 * drivers/video/tegra/host/gk20a/gr_gk20a.c
6 * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved.
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 #include <linux/delay.h> /* for udelay */
23 #include <linux/mm.h> /* for totalram_pages */
24 #include <linux/scatterlist.h>
25 #include <linux/nvmap.h>
27 #include <mach/hardware.h>
32 #include "gr_ctx_gk20a.h"
34 #include "hw_ccsr_gk20a.h"
35 #include "hw_ctxsw_prog_gk20a.h"
36 #include "hw_fifo_gk20a.h"
37 #include "hw_gr_gk20a.h"
38 #include "hw_mc_gk20a.h"
39 #include "hw_ram_gk20a.h"
40 #include "hw_pri_ringmaster_gk20a.h"
41 #include "hw_pri_ringstation_sys_gk20a.h"
42 #include "hw_pri_ringstation_gpc_gk20a.h"
43 #include "hw_pri_ringstation_fbp_gk20a.h"
44 #include "hw_proj_gk20a.h"
45 #include "hw_top_gk20a.h"
46 #include "hw_ltc_gk20a.h"
47 #include "hw_fb_gk20a.h"
48 #include "hw_therm_gk20a.h"
49 #include "hw_pbdma_gk20a.h"
50 #include "chip_support.h"
51 #include "nvhost_memmgr.h"
52 #include "gk20a_gating_reglist.h"
54 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
55 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
56 u32 addr, u32 data, u32 patch);
58 /* global ctx buffer */
59 static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
60 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
61 static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
62 struct channel_gk20a *c);
63 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
65 /* channel gr ctx buffer */
66 static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
67 struct channel_gk20a *c);
68 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
70 /* channel patch ctx buffer */
71 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
72 struct channel_gk20a *c);
73 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
75 /* golden ctx image */
76 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
77 struct channel_gk20a *c);
78 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
79 struct channel_gk20a *c);
81 static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
83 u32 i, ucode_u32_size;
84 const u32 *ucode_u32_data;
89 gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
90 gr_gpccs_dmemc_blk_f(0) |
91 gr_gpccs_dmemc_aincw_f(1)));
93 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
94 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
96 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
97 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
98 checksum += ucode_u32_data[i];
101 gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
102 gr_fecs_dmemc_blk_f(0) |
103 gr_fecs_dmemc_aincw_f(1)));
105 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
106 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
108 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
109 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
110 checksum += ucode_u32_data[i];
112 nvhost_dbg_fn("done");
115 static void gr_gk20a_load_falcon_imem(struct gk20a *g)
117 u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
118 const u32 *ucode_u32_data;
119 u32 tag, i, pad_start, pad_end;
124 cfg = gk20a_readl(g, gr_fecs_cfg_r());
125 fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
127 cfg = gk20a_readl(g, gr_gpc0_cfg_r());
128 gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
130 /* Use the broadcast address to access all of the GPCCS units. */
131 gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
132 gr_gpccs_imemc_blk_f(0) |
133 gr_gpccs_imemc_aincw_f(1)));
135 /* Setup the tags for the instruction memory. */
137 gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
139 ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
140 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
142 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
143 if (i && ((i % (256/sizeof(u32))) == 0)) {
145 gk20a_writel(g, gr_gpccs_imemt_r(0),
146 gr_gpccs_imemt_tag_f(tag));
148 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
149 checksum += ucode_u32_data[i];
153 pad_end = pad_start+(256-pad_start%256)+256;
155 (i < gpccs_imem_size * 256) && (i < pad_end);
157 if (i && ((i % 256) == 0)) {
159 gk20a_writel(g, gr_gpccs_imemt_r(0),
160 gr_gpccs_imemt_tag_f(tag));
162 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
165 gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
166 gr_fecs_imemc_blk_f(0) |
167 gr_fecs_imemc_aincw_f(1)));
169 /* Setup the tags for the instruction memory. */
171 gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
173 ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
174 ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
176 for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
177 if (i && ((i % (256/sizeof(u32))) == 0)) {
179 gk20a_writel(g, gr_fecs_imemt_r(0),
180 gr_fecs_imemt_tag_f(tag));
182 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
183 checksum += ucode_u32_data[i];
187 pad_end = pad_start+(256-pad_start%256)+256;
188 for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
189 if (i && ((i % 256) == 0)) {
191 gk20a_writel(g, gr_fecs_imemt_r(0),
192 gr_fecs_imemt_tag_f(tag));
194 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
198 static int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
201 u32 delay = expect_delay;
209 /* fmodel: host gets fifo_engine_status(gr) from gr
210 only when gr_status is read */
211 gk20a_readl(g, gr_status_r());
213 gr_enabled = gk20a_readl(g, mc_enable_r()) &
214 mc_enable_pgraph_enabled_f();
216 ctxsw_active = gk20a_readl(g,
217 fifo_engine_status_r(ENGINE_GR_GK20A)) &
218 fifo_engine_status_ctxsw_in_progress_f();
220 gr_busy = gk20a_readl(g, gr_engine_status_r()) &
221 gr_engine_status_value_busy_f();
223 if (!gr_enabled || (!gr_busy && !ctxsw_active)) {
224 nvhost_dbg_fn("done");
228 usleep_range(delay, delay * 2);
229 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
231 } while (time_before(jiffies, end_jiffies));
233 nvhost_err(dev_from_gk20a(g),
234 "timeout, ctxsw busy : %d, gr busy : %d",
235 ctxsw_active, gr_busy);
240 static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
243 /* FE_PWR_MODE_MODE_FORCE_ON for RTLSim and EMulation? */
246 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
248 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
249 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
250 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
251 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
252 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
253 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
254 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
255 gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
256 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
257 gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
260 /* Delay for > 10 nvclks after writing reset. */
261 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
263 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
264 gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
265 gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
266 gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f() |
267 gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
268 gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
269 gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f() |
270 gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
271 gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
272 gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
274 /* Delay for > 10 nvclks after writing reset. */
275 gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
277 /* FE_PWR_MODE_MODE_AUTO for RTLSim and EMulation? */
282 static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
283 u32 *mailbox_ret, u32 opc_success,
284 u32 mailbox_ok, u32 opc_fail,
287 unsigned long end_jiffies = jiffies +
288 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
289 u32 delay = GR_IDLE_CHECK_DEFAULT;
290 u32 check = WAIT_UCODE_LOOP;
295 while (check == WAIT_UCODE_LOOP) {
296 if (!time_before(jiffies, end_jiffies) &&
297 tegra_platform_is_silicon())
298 check = WAIT_UCODE_TIMEOUT;
300 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
305 switch (opc_success) {
306 case GR_IS_UCODE_OP_EQUAL:
307 if (reg == mailbox_ok)
308 check = WAIT_UCODE_OK;
310 case GR_IS_UCODE_OP_NOT_EQUAL:
311 if (reg != mailbox_ok)
312 check = WAIT_UCODE_OK;
314 case GR_IS_UCODE_OP_AND:
315 if (reg & mailbox_ok)
316 check = WAIT_UCODE_OK;
318 case GR_IS_UCODE_OP_LESSER:
319 if (reg < mailbox_ok)
320 check = WAIT_UCODE_OK;
322 case GR_IS_UCODE_OP_LESSER_EQUAL:
323 if (reg <= mailbox_ok)
324 check = WAIT_UCODE_OK;
326 case GR_IS_UCODE_OP_SKIP:
327 /* do no success check */
330 nvhost_err(dev_from_gk20a(g),
331 "invalid success opcode 0x%x", opc_success);
333 check = WAIT_UCODE_ERROR;
338 case GR_IS_UCODE_OP_EQUAL:
339 if (reg == mailbox_fail)
340 check = WAIT_UCODE_ERROR;
342 case GR_IS_UCODE_OP_NOT_EQUAL:
343 if (reg != mailbox_fail)
344 check = WAIT_UCODE_ERROR;
346 case GR_IS_UCODE_OP_AND:
347 if (reg & mailbox_fail)
348 check = WAIT_UCODE_ERROR;
350 case GR_IS_UCODE_OP_LESSER:
351 if (reg < mailbox_fail)
352 check = WAIT_UCODE_ERROR;
354 case GR_IS_UCODE_OP_LESSER_EQUAL:
355 if (reg <= mailbox_fail)
356 check = WAIT_UCODE_ERROR;
358 case GR_IS_UCODE_OP_SKIP:
359 /* do no check on fail*/
362 nvhost_err(dev_from_gk20a(g),
363 "invalid fail opcode 0x%x", opc_fail);
364 check = WAIT_UCODE_ERROR;
368 usleep_range(delay, delay * 2);
369 delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
372 if (check == WAIT_UCODE_TIMEOUT) {
373 nvhost_err(dev_from_gk20a(g),
374 "timeout waiting on ucode response");
376 } else if (check == WAIT_UCODE_ERROR) {
377 nvhost_err(dev_from_gk20a(g),
378 "ucode method failed on mailbox=%d value=0x%08x",
383 nvhost_dbg_fn("done");
387 int gr_gk20a_submit_fecs_method(struct gk20a *g,
388 u32 mb_id, u32 mb_data, u32 mb_clr,
389 u32 mtd_data, u32 mtd_adr, u32 *mb_ret,
390 u32 opc_ok, u32 mb_ok, u32 opc_fail, u32 mb_fail)
392 struct gr_gk20a *gr = &g->gr;
395 mutex_lock(&gr->fecs_mutex);
398 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(mb_id),
401 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
402 gr_fecs_ctxsw_mailbox_clear_value_f(mb_clr));
404 gk20a_writel(g, gr_fecs_method_data_r(), mtd_data);
405 gk20a_writel(g, gr_fecs_method_push_r(),
406 gr_fecs_method_push_adr_f(mtd_adr));
408 ret = gr_gk20a_ctx_wait_ucode(g, 0, mb_ret,
409 opc_ok, mb_ok, opc_fail, mb_fail);
411 mutex_unlock(&gr->fecs_mutex);
416 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
421 void *inst_ptr = NULL;
425 /* flush gpu_va before commit */
426 gk20a_mm_fb_flush(c->g);
427 gk20a_mm_l2_flush(c->g, true);
429 inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
435 addr_lo = u64_lo32(gpu_va) >> 12;
436 addr_hi = u64_hi32(gpu_va);
438 mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
439 ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
440 ram_in_gr_wfi_ptr_lo_f(addr_lo));
442 mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
443 ram_in_gr_wfi_ptr_hi_f(addr_hi));
445 nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
447 gk20a_mm_l2_invalidate(c->g);
453 nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
458 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
459 u32 addr, u32 data, u32 patch)
461 struct channel_ctx_gk20a *ch_ctx;
463 void *patch_ptr = NULL;
465 BUG_ON(patch != 0 && c == NULL);
469 patch_ptr = nvhost_memmgr_mmap(ch_ctx->patch_ctx.mem.ref);
473 patch_slot = ch_ctx->patch_ctx.data_count * 2;
475 mem_wr32(patch_ptr, patch_slot++, addr);
476 mem_wr32(patch_ptr, patch_slot++, data);
478 nvhost_memmgr_munmap(ch_ctx->patch_ctx.mem.ref, patch_ptr);
479 gk20a_mm_l2_invalidate(g);
481 ch_ctx->patch_ctx.data_count++;
483 gk20a_writel(g, addr, data);
489 static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
490 struct channel_gk20a *c)
492 u32 inst_base_ptr = u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl)
493 >> ram_in_base_shift_v());
496 nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
497 c->hw_chid, inst_base_ptr);
499 ret = gr_gk20a_submit_fecs_method(g, 0, 0, 0x30,
500 gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
501 gr_fecs_current_ctx_target_vid_mem_f() |
502 gr_fecs_current_ctx_valid_f(1),
503 gr_fecs_method_push_adr_bind_pointer_f(),
504 0, GR_IS_UCODE_OP_AND, 0x10, GR_IS_UCODE_OP_AND, 0x20);
506 nvhost_err(dev_from_gk20a(g),
507 "bind channel instance failed");
512 static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
515 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
516 struct fifo_gk20a *f = &g->fifo;
517 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
518 u32 va_lo, va_hi, va;
520 void *ctx_ptr = NULL;
524 ctx_ptr = nvhost_memmgr_mmap(ch_ctx->gr_ctx.mem.ref);
528 if (ch_ctx->zcull_ctx.gpu_va == 0 &&
529 ch_ctx->zcull_ctx.ctx_sw_mode ==
530 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
535 va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
536 va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
537 va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
540 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
542 nvhost_err(dev_from_gk20a(g),
543 "failed to disable gr engine activity\n");
548 /* Channel gr_ctx buffer is gpu cacheable.
549 Flush and invalidate before cpu update. */
550 gk20a_mm_fb_flush(g);
551 gk20a_mm_l2_flush(g, true);
553 mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_v(), 0,
554 ch_ctx->zcull_ctx.ctx_sw_mode);
556 mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, va);
559 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
561 nvhost_err(dev_from_gk20a(g),
562 "failed to enable gr engine activity\n");
566 gk20a_mm_l2_invalidate(g);
569 nvhost_memmgr_munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
574 static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
575 struct channel_gk20a *c, u32 patch)
577 struct gr_gk20a *gr = &g->gr;
578 u32 attrib_offset_in_chunk = 0;
579 u32 alpha_offset_in_chunk = 0;
580 u32 pd_ab_max_output;
581 u32 gpc_index, ppc_index;
583 u32 cbm_cfg_size1, cbm_cfg_size2;
587 gr_gk20a_ctx_patch_write(g, c, gr_ds_tga_constraintlogic_r(),
588 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
589 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
592 pd_ab_max_output = (gr->alpha_cb_default_size *
593 gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
594 gr_pd_ab_dist_cfg1_max_output_granularity_v();
596 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg1_r(),
597 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
598 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
600 alpha_offset_in_chunk = attrib_offset_in_chunk +
601 gr->tpc_count * gr->attrib_cb_size;
603 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
604 temp = proj_gpc_stride_v() * gpc_index;
605 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
607 cbm_cfg_size1 = gr->attrib_cb_default_size *
608 gr->pes_tpc_count[ppc_index][gpc_index];
609 cbm_cfg_size2 = gr->alpha_cb_default_size *
610 gr->pes_tpc_count[ppc_index][gpc_index];
612 gr_gk20a_ctx_patch_write(g, c,
613 gr_gpc0_ppc0_cbm_cfg_r() + temp +
614 proj_ppc_in_gpc_stride_v() * ppc_index,
615 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
616 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
617 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
619 attrib_offset_in_chunk += gr->attrib_cb_size *
620 gr->pes_tpc_count[ppc_index][gpc_index];
622 gr_gk20a_ctx_patch_write(g, c,
623 gr_gpc0_ppc0_cbm_cfg2_r() + temp +
624 proj_ppc_in_gpc_stride_v() * ppc_index,
625 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
626 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
628 alpha_offset_in_chunk += gr->alpha_cb_size *
629 gr->pes_tpc_count[ppc_index][gpc_index];
636 static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
637 struct channel_gk20a *c, u32 patch)
639 struct gr_gk20a *gr = &g->gr;
640 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
647 /* global pagepool buffer */
648 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
649 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
650 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
651 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
653 size = gr->global_ctx_buffer[PAGEPOOL].size /
654 gr_scc_pagepool_total_pages_byte_granularity_v();
656 if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
657 size = gr_scc_pagepool_total_pages_hwmax_v();
659 nvhost_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
662 gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_base_r(),
663 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
665 gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_r(),
666 gr_scc_pagepool_total_pages_f(size) |
667 gr_scc_pagepool_valid_true_f(), patch);
669 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_base_r(),
670 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
672 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_r(),
673 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
675 gr_gk20a_ctx_patch_write(g, c, gr_pd_pagepool_r(),
676 gr_pd_pagepool_total_pages_f(size) |
677 gr_pd_pagepool_valid_true_f(), patch);
679 /* global bundle cb */
680 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
681 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
682 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
683 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
685 size = gr->bundle_cb_default_size;
687 nvhost_dbg_info("bundle cb addr : 0x%016llx, size : %d",
690 gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_base_r(),
691 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
693 gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_size_r(),
694 gr_scc_bundle_cb_size_div_256b_f(size) |
695 gr_scc_bundle_cb_size_valid_true_f(), patch);
697 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_base_r(),
698 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
700 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_size_r(),
701 gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
702 gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
704 /* data for state_limit */
705 data = (gr->bundle_cb_default_size *
706 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
707 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
709 data = min_t(u32, data, gr->min_gpm_fifo_depth);
711 nvhost_dbg_info("bundle cb token limit : %d, state limit : %d",
712 gr->bundle_cb_token_limit, data);
714 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg2_r(),
715 gr_pd_ab_dist_cfg2_token_limit_f(gr->bundle_cb_token_limit) |
716 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
718 /* global attrib cb */
719 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
720 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
721 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
722 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
724 nvhost_dbg_info("attrib cb addr : 0x%016llx", addr);
726 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_attrib_cb_base_r(),
727 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
728 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
730 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
731 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
732 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
737 static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, u32 patch)
739 struct gr_gk20a *gr = &g->gr;
749 gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
750 pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
751 ds_debug = gk20a_readl(g, gr_ds_debug_r());
752 mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
754 if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
755 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
756 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
758 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
759 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
760 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
761 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
762 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
763 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
765 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
766 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
767 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
768 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
769 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
770 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
772 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
773 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
774 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
775 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
777 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
778 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
779 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
780 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
786 static int gr_gk20a_setup_rop_mapping(struct gk20a *g,
789 u32 norm_entries, norm_shift;
790 u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
791 u32 map0, map1, map2, map3, map4, map5;
798 gk20a_writel(g, gr_crstr_map_table_cfg_r(),
799 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
800 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
802 map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
803 gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
804 gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
805 gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
806 gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
807 gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
809 map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
810 gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
811 gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
812 gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
813 gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
814 gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
816 map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
817 gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
818 gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
819 gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
820 gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
821 gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
823 map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
824 gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
825 gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
826 gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
827 gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
828 gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
830 map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
831 gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
832 gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
833 gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
834 gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
835 gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
837 map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
838 gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
839 gr_crstr_gpc_map5_tile32_f(0) |
840 gr_crstr_gpc_map5_tile33_f(0) |
841 gr_crstr_gpc_map5_tile34_f(0) |
842 gr_crstr_gpc_map5_tile35_f(0);
844 gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
845 gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
846 gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
847 gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
848 gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
849 gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
851 switch (gr->tpc_count) {
880 norm_entries = gr->tpc_count << norm_shift;
881 coeff5_mod = (1 << 5) % norm_entries;
882 coeff6_mod = (1 << 6) % norm_entries;
883 coeff7_mod = (1 << 7) % norm_entries;
884 coeff8_mod = (1 << 8) % norm_entries;
885 coeff9_mod = (1 << 9) % norm_entries;
886 coeff10_mod = (1 << 10) % norm_entries;
887 coeff11_mod = (1 << 11) % norm_entries;
889 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
890 gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
891 gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
892 gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
893 gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
894 gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
896 gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
897 gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
898 gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
899 gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
900 gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
901 gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
902 gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
904 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
905 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
906 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
907 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
908 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
909 gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
911 gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
912 gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
913 gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
915 gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
916 gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
917 gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
918 gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
919 gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
920 gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
925 static inline u32 count_bits(u32 mask)
929 for (count = 0; temp != 0; count++)
935 static inline u32 clear_count_bits(u32 num, u32 clear_count)
937 u32 count = clear_count;
938 for (; (num != 0) && (count != 0); count--)
944 static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
947 u32 table_index_bits = 5;
948 u32 rows = (1 << table_index_bits);
949 u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
954 u32 gpcs_per_reg = 4;
957 u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
959 u32 alpha_target, beta_target;
960 u32 alpha_bits, beta_bits;
961 u32 alpha_mask, beta_mask, partial_mask;
965 u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
966 u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
967 u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
971 memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
972 memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
973 memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
975 for (row = 0; row < rows; ++row) {
976 alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
977 beta_target = gr->tpc_count - alpha_target;
979 assign_alpha = (alpha_target < beta_target);
981 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
982 reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
983 alpha_mask = beta_mask = 0;
985 for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
986 tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
989 alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
990 beta_bits = tpc_count_pes - alpha_bits;
992 beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
993 alpha_bits = tpc_count_pes - beta_bits;
996 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
997 partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
998 alpha_mask |= partial_mask;
1000 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
1001 beta_mask |= partial_mask;
1003 alpha_target -= min(alpha_bits, alpha_target);
1004 beta_target -= min(beta_bits, beta_target);
1006 if ((alpha_bits > 0) || (beta_bits > 0))
1007 assign_alpha = !assign_alpha;
1010 switch (gpc_index % gpcs_per_reg) {
1012 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
1013 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
1016 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
1017 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
1020 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
1021 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
1024 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
1025 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
1028 map_reg_used[reg_offset] = true;
1032 for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
1033 if (map_reg_used[index]) {
1034 gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
1035 gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
1042 static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1044 struct gr_gk20a *gr = &g->gr;
1045 u32 tpc_index, gpc_index;
1046 u32 tpc_offset, gpc_offset;
1047 u32 sm_id = 0, gpc_id = 0;
1048 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
1050 u32 max_ways_evict = INVALID_MAX_WAYS;
1054 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
1055 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1056 gpc_offset = proj_gpc_stride_v() * gpc_index;
1057 if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1058 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
1060 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1061 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1062 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1063 gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1064 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
1065 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1066 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1067 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1069 sm_id_to_gpc_id[sm_id] = gpc_index;
1073 gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
1074 gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1075 gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1076 gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1080 for (tpc_index = 0, gpc_id = 0;
1081 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1082 tpc_index++, gpc_id += 8) {
1084 if (gpc_id >= gr->gpc_count)
1088 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
1089 gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
1090 gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
1091 gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
1092 gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
1093 gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
1094 gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
1095 gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
1097 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1098 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1101 /* grSetupPDMapping stubbed for gk20a */
1102 gr_gk20a_setup_rop_mapping(g, gr);
1103 gr_gk20a_setup_alpha_beta_tables(g, gr);
1105 if (gr->num_fbps == 1)
1108 if (max_ways_evict != INVALID_MAX_WAYS)
1109 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(),
1110 ((gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) &
1111 ~(ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0))) |
1112 ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways_evict)));
1115 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1118 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1119 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
1120 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
1121 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
1122 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
1125 gk20a_writel(g, gr_cwd_fs_r(),
1126 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1127 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1129 gk20a_writel(g, gr_bes_zrop_settings_r(),
1130 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1131 gk20a_writel(g, gr_bes_crop_settings_r(),
1132 gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1137 static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
1139 struct gk20a *g = c->g;
1143 u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl)
1144 >> ram_in_base_shift_v());
1148 ret = gr_gk20a_submit_fecs_method(g, 0, 0, 3,
1149 gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1150 gr_fecs_current_ctx_target_vid_mem_f() |
1151 gr_fecs_current_ctx_valid_f(1), save_type, 0,
1152 GR_IS_UCODE_OP_AND, 1, GR_IS_UCODE_OP_AND, 2);
1154 nvhost_err(dev_from_gk20a(g), "save context image failed");
1159 /* init global golden image from a fresh gr_ctx in channel ctx.
1160 save a copy in local_golden_image in ctx_vars */
1161 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1162 struct channel_gk20a *c)
1164 struct gr_gk20a *gr = &g->gr;
1165 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1166 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1167 u32 ctx_header_words;
1170 void *ctx_ptr = NULL;
1171 void *gold_ptr = NULL;
1176 /* golden ctx is global to all channels. Although only the first
1177 channel initializes golden image, driver needs to prevent multiple
1178 channels from initializing golden ctx at the same time */
1179 mutex_lock(&gr->ctx_mutex);
1181 if (gr->ctx_vars.golden_image_initialized)
1184 err = gr_gk20a_fecs_ctx_bind_channel(g, c);
1188 err = gr_gk20a_commit_global_ctx_buffers(g, c, 0);
1192 gold_ptr = nvhost_memmgr_mmap(gr->global_ctx_buffer[GOLDEN_CTX].ref);
1196 ctx_ptr = nvhost_memmgr_mmap(ch_ctx->gr_ctx.mem.ref);
1200 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1201 ctx_header_words >>= 2;
1203 /* Channel gr_ctx buffer is gpu cacheable.
1204 Flush before cpu read. */
1205 gk20a_mm_fb_flush(g);
1206 gk20a_mm_l2_flush(g, false);
1208 for (i = 0; i < ctx_header_words; i++) {
1209 data = mem_rd32(ctx_ptr, i);
1210 mem_wr32(gold_ptr, i, data);
1213 mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_v(), 0,
1214 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1216 mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, 0);
1218 gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1220 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_f());
1222 if (gr->ctx_vars.local_golden_image == NULL) {
1224 gr->ctx_vars.local_golden_image =
1225 kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
1227 if (gr->ctx_vars.local_golden_image == NULL) {
1232 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1233 gr->ctx_vars.local_golden_image[i] =
1234 mem_rd32(gold_ptr, i);
1237 gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
1239 gr->ctx_vars.golden_image_initialized = true;
1241 gk20a_mm_l2_invalidate(g);
1243 gk20a_writel(g, gr_fecs_current_ctx_r(),
1244 gr_fecs_current_ctx_valid_false_f());
1248 nvhost_dbg(dbg_fn | dbg_err, "fail");
1250 nvhost_dbg_fn("done");
1253 nvhost_memmgr_munmap(gr->global_ctx_buffer[GOLDEN_CTX].ref,
1256 nvhost_memmgr_munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1258 mutex_unlock(&gr->ctx_mutex);
1262 /* load saved fresh copy of gloden image into channel gr_ctx */
1263 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1264 struct channel_gk20a *c)
1266 struct gr_gk20a *gr = &g->gr;
1267 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1272 void *ctx_ptr = NULL;
1276 if (gr->ctx_vars.local_golden_image == NULL)
1279 /* Channel gr_ctx buffer is gpu cacheable.
1280 Flush and invalidate before cpu update. */
1281 gk20a_mm_fb_flush(g);
1282 gk20a_mm_l2_flush(g, true);
1284 ctx_ptr = nvhost_memmgr_mmap(ch_ctx->gr_ctx.mem.ref);
1288 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1289 mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
1291 mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_v(), 0, 0);
1292 mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_v(), 0, 0);
1294 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
1295 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
1297 mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_v(), 0,
1298 ch_ctx->patch_ctx.data_count);
1299 mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_v(), 0,
1301 mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_v(), 0,
1304 /* no user for client managed performance counter ctx */
1305 ch_ctx->pm_ctx.ctx_sw_mode =
1306 ctxsw_prog_main_image_pm_mode_no_ctxsw_v();
1308 mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_v(), 0,
1309 ch_ctx->pm_ctx.ctx_sw_mode);
1310 mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_v(), 0, 0);
1312 nvhost_memmgr_munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1314 gk20a_mm_l2_invalidate(g);
1316 if (tegra_platform_is_linsim()) {
1318 u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl)
1319 >> ram_in_base_shift_v());
1321 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0,
1322 gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1323 gr_fecs_current_ctx_target_vid_mem_f() |
1324 gr_fecs_current_ctx_valid_f(1),
1325 gr_fecs_method_push_adr_restore_golden_f(), 0,
1326 GR_IS_UCODE_OP_EQUAL, gr_fecs_ctxsw_mailbox_value_pass_v(),
1327 GR_IS_UCODE_OP_SKIP, 0);
1329 nvhost_err(dev_from_gk20a(g),
1330 "restore context image failed");
1336 static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1340 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1341 gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1343 gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1344 gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1346 gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1347 gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1349 nvhost_dbg_fn("done");
1352 static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
1358 if (tegra_platform_is_linsim()) {
1359 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
1360 gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
1361 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
1362 gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
1365 gr_gk20a_load_falcon_dmem(g);
1366 gr_gk20a_load_falcon_imem(g);
1368 gr_gk20a_start_falcon_ucode(g);
1370 ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
1371 GR_IS_UCODE_OP_EQUAL,
1372 eUcodeHandshakeInitComplete,
1373 GR_IS_UCODE_OP_SKIP, 0);
1375 nvhost_err(dev_from_gk20a(g), "falcon ucode init timeout");
1379 gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
1380 gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
1381 gk20a_writel(g, gr_fecs_method_push_r(),
1382 gr_fecs_method_push_adr_set_watchdog_timeout_f());
1384 nvhost_dbg_fn("done");
1388 static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
1390 u32 golden_ctx_image_size = 0;
1391 u32 zcull_ctx_image_size = 0;
1392 u32 pm_ctx_image_size = 0;
1397 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1398 gr_fecs_method_push_adr_discover_image_size_f(),
1399 &golden_ctx_image_size,
1400 GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1402 nvhost_err(dev_from_gk20a(g),
1403 "query golden image size failed");
1407 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1408 gr_fecs_method_push_adr_discover_zcull_image_size_f(),
1409 &zcull_ctx_image_size,
1410 GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1412 nvhost_err(dev_from_gk20a(g),
1413 "query zcull ctx image size failed");
1417 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1418 gr_fecs_method_push_adr_discover_pm_image_size_f(),
1420 GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1422 nvhost_err(dev_from_gk20a(g),
1423 "query pm ctx image size failed");
1427 if (!g->gr.ctx_vars.golden_image_size &&
1428 !g->gr.ctx_vars.zcull_ctxsw_image_size) {
1429 g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
1430 g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
1432 /* hw is different after railgating? */
1433 BUG_ON(g->gr.ctx_vars.golden_image_size != golden_ctx_image_size);
1434 BUG_ON(g->gr.ctx_vars.zcull_ctxsw_image_size != zcull_ctx_image_size);
1437 nvhost_dbg_fn("done");
1441 static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
1443 struct gr_gk20a *gr = &g->gr;
1444 struct mem_mgr *memmgr = mem_mgr_from_g(g);
1445 struct mem_handle *mem;
1446 u32 i, attr_buffer_size;
1448 u32 cb_buffer_size = gr_scc_bundle_cb_size_div_256b__prod_v() *
1449 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
1451 u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
1452 gr_scc_pagepool_total_pages_byte_granularity_v();
1454 u32 attr_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
1455 u32 alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
1458 attr_cb_default_size + (attr_cb_default_size >> 1);
1460 alpha_cb_default_size + (alpha_cb_default_size >> 1);
1462 u32 num_tpcs_per_pes = proj_scal_litter_num_tpcs_per_pes_v();
1463 u32 attr_max_size_per_tpc =
1464 gr_gpc0_ppc0_cbm_cfg_size_v(~0) / num_tpcs_per_pes;
1465 u32 alpha_max_size_per_tpc =
1466 gr_gpc0_ppc0_cbm_cfg2_size_v(~0) / num_tpcs_per_pes;
1472 (attr_cb_size > attr_max_size_per_tpc) ?
1473 attr_max_size_per_tpc : attr_cb_size;
1474 attr_cb_default_size =
1475 (attr_cb_default_size > attr_cb_size) ?
1476 attr_cb_size : attr_cb_default_size;
1478 (alpha_cb_size > alpha_max_size_per_tpc) ?
1479 alpha_max_size_per_tpc : alpha_cb_size;
1480 alpha_cb_default_size =
1481 (alpha_cb_default_size > alpha_cb_size) ?
1482 alpha_cb_size : alpha_cb_default_size;
1485 (gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * alpha_cb_size +
1486 gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * alpha_cb_size) *
1489 nvhost_dbg_info("cb_buffer_size : %d", cb_buffer_size);
1491 mem = nvhost_memmgr_alloc(memmgr, cb_buffer_size,
1492 DEFAULT_ALLOC_ALIGNMENT,
1493 DEFAULT_ALLOC_FLAGS,
1498 gr->global_ctx_buffer[CIRCULAR].ref = mem;
1499 gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
1501 mem = nvhost_memmgr_alloc(memmgr, cb_buffer_size,
1502 DEFAULT_ALLOC_ALIGNMENT,
1503 DEFAULT_ALLOC_FLAGS,
1504 NVMAP_HEAP_CARVEOUT_VPR);
1506 gr->global_ctx_buffer[CIRCULAR_VPR].ref = mem;
1507 gr->global_ctx_buffer[CIRCULAR_VPR].size = cb_buffer_size;
1510 nvhost_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
1512 mem = nvhost_memmgr_alloc(memmgr, pagepool_buffer_size,
1513 DEFAULT_ALLOC_ALIGNMENT,
1514 DEFAULT_ALLOC_FLAGS,
1519 gr->global_ctx_buffer[PAGEPOOL].ref = mem;
1520 gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
1522 mem = nvhost_memmgr_alloc(memmgr, pagepool_buffer_size,
1523 DEFAULT_ALLOC_ALIGNMENT,
1524 DEFAULT_ALLOC_FLAGS,
1525 NVMAP_HEAP_CARVEOUT_VPR);
1527 gr->global_ctx_buffer[PAGEPOOL_VPR].ref = mem;
1528 gr->global_ctx_buffer[PAGEPOOL_VPR].size = pagepool_buffer_size;
1531 nvhost_dbg_info("attr_buffer_size : %d", attr_buffer_size);
1533 mem = nvhost_memmgr_alloc(memmgr, attr_buffer_size,
1534 DEFAULT_ALLOC_ALIGNMENT,
1535 DEFAULT_ALLOC_FLAGS,
1540 gr->global_ctx_buffer[ATTRIBUTE].ref = mem;
1541 gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
1543 mem = nvhost_memmgr_alloc(memmgr, attr_buffer_size,
1544 DEFAULT_ALLOC_ALIGNMENT,
1545 DEFAULT_ALLOC_FLAGS,
1546 NVMAP_HEAP_CARVEOUT_VPR);
1548 gr->global_ctx_buffer[ATTRIBUTE_VPR].ref = mem;
1549 gr->global_ctx_buffer[ATTRIBUTE_VPR].size = attr_buffer_size;
1552 nvhost_dbg_info("golden_image_size : %d",
1553 gr->ctx_vars.golden_image_size);
1555 mem = nvhost_memmgr_alloc(memmgr, gr->ctx_vars.golden_image_size,
1556 DEFAULT_ALLOC_ALIGNMENT,
1557 DEFAULT_ALLOC_FLAGS,
1562 gr->global_ctx_buffer[GOLDEN_CTX].ref = mem;
1563 gr->global_ctx_buffer[GOLDEN_CTX].size =
1564 gr->ctx_vars.golden_image_size;
1566 nvhost_dbg_fn("done");
1570 nvhost_dbg(dbg_fn | dbg_err, "fail");
1571 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1572 if (gr->global_ctx_buffer[i].ref) {
1573 nvhost_memmgr_put(memmgr,
1574 gr->global_ctx_buffer[i].ref);
1575 memset(&gr->global_ctx_buffer[i],
1576 0, sizeof(struct mem_desc));
1582 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
1584 struct gr_gk20a *gr = &g->gr;
1585 struct mem_mgr *memmgr = mem_mgr_from_g(g);
1588 for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1589 nvhost_memmgr_put(memmgr, gr->global_ctx_buffer[i].ref);
1590 memset(&gr->global_ctx_buffer[i], 0, sizeof(struct mem_desc));
1593 nvhost_dbg_fn("done");
1596 static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
1597 struct channel_gk20a *c)
1599 struct vm_gk20a *ch_vm = c->vm;
1600 struct mem_mgr *memmgr = mem_mgr_from_g(g);
1601 struct mem_handle *handle_ref;
1602 u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1603 struct gr_gk20a *gr = &g->gr;
1608 /* Circular Buffer */
1609 if (!c->vpr || (gr->global_ctx_buffer[CIRCULAR_VPR].ref == NULL))
1610 handle_ref = gr->global_ctx_buffer[CIRCULAR].ref;
1612 handle_ref = gr->global_ctx_buffer[CIRCULAR_VPR].ref;
1614 gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
1615 /*offset_align, flags, kind*/
1616 0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
1620 g_bfr_va[CIRCULAR_VA] = gpu_va;
1622 /* Attribute Buffer */
1623 if (!c->vpr || (gr->global_ctx_buffer[ATTRIBUTE_VPR].ref == NULL))
1624 handle_ref = gr->global_ctx_buffer[ATTRIBUTE].ref;
1626 handle_ref = gr->global_ctx_buffer[ATTRIBUTE_VPR].ref;
1628 gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
1629 /*offset_align, flags, kind*/
1630 0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
1634 g_bfr_va[ATTRIBUTE_VA] = gpu_va;
1637 if (!c->vpr || (gr->global_ctx_buffer[PAGEPOOL_VPR].ref == NULL))
1638 handle_ref = gr->global_ctx_buffer[PAGEPOOL].ref;
1640 handle_ref = gr->global_ctx_buffer[PAGEPOOL_VPR].ref;
1642 gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
1643 /*offset_align, flags, kind*/
1644 0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
1648 g_bfr_va[PAGEPOOL_VA] = gpu_va;
1651 gpu_va = ch_vm->map(ch_vm, memmgr,
1652 gr->global_ctx_buffer[GOLDEN_CTX].ref,
1653 /*offset_align, flags, kind*/
1657 g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
1659 c->ch_ctx.global_ctx_buffer_mapped = true;
1663 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1665 ch_vm->unmap(ch_vm, g_bfr_va[i]);
1672 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
1674 struct vm_gk20a *ch_vm = c->vm;
1675 u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1680 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1682 ch_vm->unmap(ch_vm, g_bfr_va[i]);
1686 c->ch_ctx.global_ctx_buffer_mapped = false;
1689 static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
1690 struct channel_gk20a *c)
1692 struct gr_gk20a *gr = &g->gr;
1693 struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
1694 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1695 struct vm_gk20a *ch_vm = c->vm;
1699 if (gr->ctx_vars.buffer_size == 0)
1702 /* alloc channel gr ctx buffer */
1703 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
1704 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
1706 gr_ctx->mem.ref = nvhost_memmgr_alloc(memmgr,
1707 gr->ctx_vars.buffer_total_size,
1708 DEFAULT_ALLOC_ALIGNMENT,
1709 DEFAULT_ALLOC_FLAGS,
1712 if (IS_ERR(gr_ctx->mem.ref))
1715 gr_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1717 /*offset_align, flags, kind*/
1718 0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0, NULL);
1719 if (!gr_ctx->gpu_va) {
1720 nvhost_memmgr_put(memmgr, gr_ctx->mem.ref);
1727 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
1729 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1730 struct mem_mgr *ch_nvmap = gk20a_channel_mem_mgr(c);
1731 struct vm_gk20a *ch_vm = c->vm;
1735 ch_vm->unmap(ch_vm, ch_ctx->gr_ctx.gpu_va);
1736 nvhost_memmgr_put(ch_nvmap, ch_ctx->gr_ctx.mem.ref);
1739 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
1740 struct channel_gk20a *c)
1742 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1743 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1744 struct vm_gk20a *ch_vm = c->vm;
1748 patch_ctx->mem.ref = nvhost_memmgr_alloc(memmgr, 128 * sizeof(u32),
1749 DEFAULT_ALLOC_ALIGNMENT,
1750 DEFAULT_ALLOC_FLAGS,
1752 if (IS_ERR(patch_ctx->mem.ref))
1755 patch_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1757 /*offset_align, flags, kind*/
1759 if (!patch_ctx->gpu_va)
1762 nvhost_dbg_fn("done");
1766 nvhost_dbg(dbg_fn | dbg_err, "fail");
1767 if (patch_ctx->mem.ref) {
1768 nvhost_memmgr_put(memmgr, patch_ctx->mem.ref);
1769 patch_ctx->mem.ref = 0;
1775 static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
1777 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1778 struct vm_gk20a *ch_vm = c->vm;
1782 if (patch_ctx->gpu_va)
1783 ch_vm->unmap(ch_vm, patch_ctx->gpu_va);
1784 patch_ctx->gpu_va = 0;
1785 patch_ctx->data_count = 0;
1788 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
1790 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1791 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1795 gr_gk20a_unmap_channel_patch_ctx(c);
1797 if (patch_ctx->mem.ref) {
1798 nvhost_memmgr_put(memmgr, patch_ctx->mem.ref);
1799 patch_ctx->mem.ref = 0;
1803 void gk20a_free_channel_ctx(struct channel_gk20a *c)
1805 gr_gk20a_unmap_global_ctx_buffers(c);
1806 gr_gk20a_free_channel_patch_ctx(c);
1807 gr_gk20a_free_channel_gr_ctx(c);
1809 /* zcull_ctx, pm_ctx */
1811 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
1814 c->first_init = false;
1817 int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
1818 struct nvhost_alloc_obj_ctx_args *args)
1820 struct gk20a *g = c->g;
1821 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1822 bool change_to_compute_mode = false;
1827 /* an address space needs to have been bound at this point.*/
1828 if (!gk20a_channel_as_bound(c)) {
1829 nvhost_err(dev_from_gk20a(g),
1830 "not bound to address space at time"
1831 " of grctx allocation");
1835 switch (args->class_num) {
1836 case KEPLER_COMPUTE_A:
1837 /* tbd: NV2080_CTRL_GPU_COMPUTE_MODE_RULES_EXCLUSIVE_COMPUTE */
1838 /* tbd: PDB_PROP_GRAPHICS_DISTINCT_3D_AND_COMPUTE_STATE_DEF */
1839 change_to_compute_mode = true;
1843 case KEPLER_DMA_COPY_A:
1847 nvhost_err(dev_from_gk20a(g),
1848 "invalid obj class 0x%x", args->class_num);
1853 /* allocate gr ctx buffer */
1854 if (ch_ctx->gr_ctx.mem.ref == NULL) {
1855 err = gr_gk20a_alloc_channel_gr_ctx(g, c);
1857 nvhost_err(dev_from_gk20a(g),
1858 "fail to allocate gr ctx buffer");
1862 /*TBD: needs to be more subtle about which is being allocated
1863 * as some are allowed to be allocated along same channel */
1864 nvhost_err(dev_from_gk20a(g),
1865 "too many classes alloc'd on same channel");
1870 /* commit gr ctx buffer */
1871 err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
1873 nvhost_err(dev_from_gk20a(g),
1874 "fail to commit gr ctx buffer");
1878 /* allocate patch buffer */
1879 if (ch_ctx->patch_ctx.mem.ref == NULL) {
1880 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
1882 nvhost_err(dev_from_gk20a(g),
1883 "fail to allocate patch buffer");
1888 /* map global buffer to channel gpu_va and commit */
1889 if (!ch_ctx->global_ctx_buffer_mapped) {
1890 err = gr_gk20a_map_global_ctx_buffers(g, c);
1892 nvhost_err(dev_from_gk20a(g),
1893 "fail to map global ctx buffer");
1896 gr_gk20a_elpg_protected_call(g,
1897 gr_gk20a_commit_global_ctx_buffers(g, c, 1));
1900 /* init gloden image, ELPG enabled after this is done */
1901 err = gr_gk20a_init_golden_ctx_image(g, c);
1903 nvhost_err(dev_from_gk20a(g),
1904 "fail to init golden ctx image");
1908 /* load golden image */
1909 if (!c->first_init) {
1910 err = gr_gk20a_elpg_protected_call(g,
1911 gr_gk20a_load_golden_ctx_image(g, c));
1913 nvhost_err(dev_from_gk20a(g),
1914 "fail to load golden ctx image");
1917 c->first_init = true;
1919 gk20a_mm_l2_invalidate(g);
1922 nvhost_dbg_fn("done");
1925 /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
1926 can be reused so no need to release them.
1927 2. golden image init and load is a one time thing so if
1928 they pass, no need to undo. */
1929 nvhost_dbg(dbg_fn | dbg_err, "fail");
1933 int gk20a_free_obj_ctx(struct channel_gk20a *c,
1934 struct nvhost_free_obj_ctx_args *args)
1936 unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
1940 if (c->num_objects == 0)
1945 if (c->num_objects == 0) {
1946 c->first_init = false;
1947 gk20a_disable_channel(c, true, /*wait for finish*/
1949 gr_gk20a_unmap_channel_patch_ctx(c);
1955 static void gk20a_remove_gr_support(struct gr_gk20a *gr)
1957 struct gk20a *g = gr->g;
1958 struct mem_mgr *memmgr = mem_mgr_from_g(g);
1962 gr_gk20a_free_global_ctx_buffers(g);
1964 nvhost_memmgr_free_sg_table(memmgr, gr->mmu_wr_mem.mem.ref,
1965 gr->mmu_wr_mem.mem.sgt);
1966 nvhost_memmgr_free_sg_table(memmgr, gr->mmu_rd_mem.mem.ref,
1967 gr->mmu_rd_mem.mem.sgt);
1968 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1969 if (sg_dma_address(gr->compbit_store.mem.sgt->sgl))
1970 nvhost_memmgr_smmu_unmap(gr->compbit_store.mem.sgt,
1971 gr->compbit_store.mem.size,
1974 nvhost_memmgr_free_sg_table(memmgr, gr->compbit_store.mem.ref,
1975 gr->compbit_store.mem.sgt);
1976 nvhost_memmgr_put(memmgr, gr->mmu_wr_mem.mem.ref);
1977 nvhost_memmgr_put(memmgr, gr->mmu_rd_mem.mem.ref);
1978 nvhost_memmgr_put(memmgr, gr->compbit_store.mem.ref);
1979 memset(&gr->mmu_wr_mem, 0, sizeof(struct mem_desc));
1980 memset(&gr->mmu_rd_mem, 0, sizeof(struct mem_desc));
1981 memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
1983 kfree(gr->gpc_tpc_count);
1984 kfree(gr->gpc_zcb_count);
1985 kfree(gr->gpc_ppc_count);
1986 kfree(gr->pes_tpc_count[0]);
1987 kfree(gr->pes_tpc_count[1]);
1988 kfree(gr->pes_tpc_mask[0]);
1989 kfree(gr->pes_tpc_mask[1]);
1990 kfree(gr->gpc_skip_mask);
1991 kfree(gr->map_tiles);
1992 gr->gpc_tpc_count = NULL;
1993 gr->gpc_zcb_count = NULL;
1994 gr->gpc_ppc_count = NULL;
1995 gr->pes_tpc_count[0] = NULL;
1996 gr->pes_tpc_count[1] = NULL;
1997 gr->pes_tpc_mask[0] = NULL;
1998 gr->pes_tpc_mask[1] = NULL;
1999 gr->gpc_skip_mask = NULL;
2000 gr->map_tiles = NULL;
2002 kfree(gr->ctx_vars.ucode.fecs.inst.l);
2003 kfree(gr->ctx_vars.ucode.fecs.data.l);
2004 kfree(gr->ctx_vars.ucode.gpccs.inst.l);
2005 kfree(gr->ctx_vars.ucode.gpccs.data.l);
2006 kfree(gr->ctx_vars.sw_bundle_init.l);
2007 kfree(gr->ctx_vars.sw_method_init.l);
2008 kfree(gr->ctx_vars.sw_ctx_load.l);
2009 kfree(gr->ctx_vars.sw_non_ctx_load.l);
2010 kfree(gr->ctx_vars.ctxsw_regs.sys.l);
2011 kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
2012 kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
2013 kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
2014 kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
2015 kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
2016 kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
2017 kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
2019 kfree(gr->ctx_vars.local_golden_image);
2020 gr->ctx_vars.local_golden_image = NULL;
2022 nvhost_allocator_destroy(&gr->comp_tags);
2025 static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
2027 u32 gpc_index, pes_index;
2030 u32 pes_heavy_index;
2031 u32 gpc_new_skip_mask;
2034 tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
2035 gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
2037 tmp = gk20a_readl(g, top_num_gpcs_r());
2038 gr->max_gpc_count = top_num_gpcs_value_v(tmp);
2040 tmp = gk20a_readl(g, top_num_fbps_r());
2041 gr->max_fbps_count = top_num_fbps_value_v(tmp);
2043 tmp = gk20a_readl(g, top_tpc_per_gpc_r());
2044 gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
2046 gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
2048 tmp = gk20a_readl(g, top_num_fbps_r());
2049 gr->sys_count = top_num_fbps_value_v(tmp);
2051 tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
2052 gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
2054 gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
2055 gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
2057 if (!gr->gpc_count) {
2058 nvhost_err(dev_from_gk20a(g), "gpc_count==0!");
2062 gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2063 gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2064 gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2065 gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2066 gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2067 gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2068 gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2070 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
2073 if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
2074 !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
2075 !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
2079 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2080 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
2082 gr->gpc_tpc_count[gpc_index] =
2083 gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
2084 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
2086 gr->gpc_zcb_count[gpc_index] =
2087 gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
2088 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
2090 gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
2091 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
2092 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
2094 tmp = gk20a_readl(g,
2095 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
2096 gpc_index * proj_gpc_stride_v());
2098 pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
2099 pes_tpc_count = count_bits(pes_tpc_mask);
2101 gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
2102 gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
2105 gpc_new_skip_mask = 0;
2106 if (gr->pes_tpc_count[0][gpc_index] +
2107 gr->pes_tpc_count[1][gpc_index] == 5) {
2109 gr->pes_tpc_count[0][gpc_index] >
2110 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2113 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2114 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2115 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2117 } else if ((gr->pes_tpc_count[0][gpc_index] +
2118 gr->pes_tpc_count[1][gpc_index] == 4) &&
2119 (gr->pes_tpc_count[0][gpc_index] !=
2120 gr->pes_tpc_count[1][gpc_index])) {
2122 gr->pes_tpc_count[0][gpc_index] >
2123 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2126 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2127 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2128 (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2130 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
2133 nvhost_dbg_info("fbps: %d", gr->num_fbps);
2134 nvhost_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
2135 nvhost_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
2136 nvhost_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
2137 nvhost_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
2138 nvhost_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
2139 nvhost_dbg_info("sys_count: %d", gr->sys_count);
2140 nvhost_dbg_info("gpc_count: %d", gr->gpc_count);
2141 nvhost_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
2142 nvhost_dbg_info("tpc_count: %d", gr->tpc_count);
2143 nvhost_dbg_info("ppc_count: %d", gr->ppc_count);
2145 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2146 nvhost_dbg_info("gpc_tpc_count[%d] : %d",
2147 gpc_index, gr->gpc_tpc_count[gpc_index]);
2148 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2149 nvhost_dbg_info("gpc_zcb_count[%d] : %d",
2150 gpc_index, gr->gpc_zcb_count[gpc_index]);
2151 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2152 nvhost_dbg_info("gpc_ppc_count[%d] : %d",
2153 gpc_index, gr->gpc_ppc_count[gpc_index]);
2154 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2155 nvhost_dbg_info("gpc_skip_mask[%d] : %d",
2156 gpc_index, gr->gpc_skip_mask[gpc_index]);
2157 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2159 pes_index < gr->pe_count_per_gpc;
2161 nvhost_dbg_info("pes_tpc_count[%d][%d] : %d",
2162 pes_index, gpc_index,
2163 gr->pes_tpc_count[pes_index][gpc_index]);
2165 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2167 pes_index < gr->pe_count_per_gpc;
2169 nvhost_dbg_info("pes_tpc_mask[%d][%d] : %d",
2170 pes_index, gpc_index,
2171 gr->pes_tpc_mask[pes_index][gpc_index]);
2173 gr->bundle_cb_default_size = gr_scc_bundle_cb_size_div_256b__prod_v();
2174 gr->min_gpm_fifo_depth = gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
2175 gr->bundle_cb_token_limit = gr_pd_ab_dist_cfg2_token_limit_init_v();
2176 gr->attrib_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
2177 /* gk20a has a fixed beta CB RAM, don't alloc more */
2178 gr->attrib_cb_size = gr->attrib_cb_default_size;
2179 gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
2180 gr->alpha_cb_size = gr->alpha_cb_default_size + (gr->alpha_cb_default_size >> 1);
2181 gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
2183 nvhost_dbg_info("bundle_cb_default_size: %d",
2184 gr->bundle_cb_default_size);
2185 nvhost_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
2186 nvhost_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
2187 nvhost_dbg_info("attrib_cb_default_size: %d",
2188 gr->attrib_cb_default_size);
2189 nvhost_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
2190 nvhost_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
2191 nvhost_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
2192 nvhost_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
2200 static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
2202 struct mem_mgr *memmgr = mem_mgr_from_g(g);
2205 gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
2207 gr->mmu_wr_mem.mem.ref = nvhost_memmgr_alloc(memmgr,
2208 gr->mmu_wr_mem_size,
2209 DEFAULT_ALLOC_ALIGNMENT,
2210 DEFAULT_ALLOC_FLAGS,
2212 if (IS_ERR(gr->mmu_wr_mem.mem.ref))
2214 gr->mmu_wr_mem.mem.size = gr->mmu_wr_mem_size;
2216 gr->mmu_rd_mem.mem.ref = nvhost_memmgr_alloc(memmgr,
2217 gr->mmu_rd_mem_size,
2218 DEFAULT_ALLOC_ALIGNMENT,
2219 DEFAULT_ALLOC_FLAGS,
2221 if (IS_ERR(gr->mmu_rd_mem.mem.ref))
2223 gr->mmu_rd_mem.mem.size = gr->mmu_rd_mem_size;
2225 mmu_ptr = nvhost_memmgr_mmap(gr->mmu_wr_mem.mem.ref);
2228 memset(mmu_ptr, 0, gr->mmu_wr_mem.mem.size);
2229 nvhost_memmgr_munmap(gr->mmu_wr_mem.mem.ref, mmu_ptr);
2231 mmu_ptr = nvhost_memmgr_mmap(gr->mmu_rd_mem.mem.ref);
2234 memset(mmu_ptr, 0, gr->mmu_rd_mem.mem.size);
2235 nvhost_memmgr_munmap(gr->mmu_rd_mem.mem.ref, mmu_ptr);
2237 gr->mmu_wr_mem.mem.sgt =
2238 nvhost_memmgr_sg_table(memmgr, gr->mmu_wr_mem.mem.ref);
2239 if (IS_ERR(gr->mmu_wr_mem.mem.sgt))
2242 gr->mmu_rd_mem.mem.sgt =
2243 nvhost_memmgr_sg_table(memmgr, gr->mmu_rd_mem.mem.ref);
2244 if (IS_ERR(gr->mmu_rd_mem.mem.sgt))
2252 static u32 prime_set[18] = {
2253 2, 3, 5, 7, 11, 13, 17, 19, 23, 39, 31, 37, 41, 43, 47, 53, 59, 61 };
2255 static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
2259 s32 *init_frac = NULL;
2260 s32 *init_err = NULL;
2261 s32 *run_err = NULL;
2262 s32 *sorted_num_tpcs = NULL;
2263 s32 *sorted_to_unsorted_gpc_map = NULL;
2267 u32 max_tpc_count = 0;
2271 bool delete_map = false;
2275 init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2276 init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2277 run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2279 kzalloc(proj_scal_max_gpcs_v() *
2280 proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
2282 sorted_to_unsorted_gpc_map =
2283 kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2285 if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
2286 sorted_to_unsorted_gpc_map)) {
2291 gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
2293 if (gr->tpc_count == 3)
2294 gr->map_row_offset = 2;
2295 else if (gr->tpc_count < 3)
2296 gr->map_row_offset = 1;
2298 gr->map_row_offset = 3;
2300 for (index = 1; index < 18; index++) {
2301 u32 prime = prime_set[index];
2302 if ((gr->tpc_count % prime) != 0) {
2303 gr->map_row_offset = prime;
2309 switch (gr->tpc_count) {
2311 gr->map_row_offset = 6;
2314 gr->map_row_offset = 5;
2317 gr->map_row_offset = 2;
2320 gr->map_row_offset = 7;
2323 gr->map_row_offset = 6;
2327 gr->map_row_offset = 1;
2333 if (gr->map_tiles) {
2334 if (gr->map_tile_count != gr->tpc_count)
2337 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
2338 if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
2343 kfree(gr->map_tiles);
2344 gr->map_tiles = NULL;
2345 gr->map_tile_count = 0;
2349 if (gr->map_tiles == NULL) {
2350 gr->map_tile_count = proj_scal_max_gpcs_v();
2352 gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
2353 if (gr->map_tiles == NULL) {
2358 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2359 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
2360 sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
2364 while (!gpc_sorted) {
2366 for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
2367 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
2369 swap = sorted_num_tpcs[gpc_index];
2370 sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
2371 sorted_num_tpcs[gpc_index + 1] = swap;
2372 swap = sorted_to_unsorted_gpc_map[gpc_index];
2373 sorted_to_unsorted_gpc_map[gpc_index] =
2374 sorted_to_unsorted_gpc_map[gpc_index + 1];
2375 sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
2380 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2381 if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
2382 max_tpc_count = gr->gpc_tpc_count[gpc_index];
2384 mul_factor = gr->gpc_count * max_tpc_count;
2385 if (mul_factor & 0x1)
2390 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
2392 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2393 num_tpc = sorted_num_tpcs[gpc_index];
2395 init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
2398 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
2400 init_err[gpc_index] = 0;
2402 run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
2405 while (gpc_mark < gr->tpc_count) {
2406 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2407 if ((run_err[gpc_index] * 2) >= comm_denom) {
2408 gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
2409 run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
2411 run_err[gpc_index] += init_frac[gpc_index];
2420 kfree(sorted_num_tpcs);
2421 kfree(sorted_to_unsorted_gpc_map);
2424 nvhost_dbg(dbg_fn | dbg_err, "fail");
2426 nvhost_dbg_fn("done");
2431 static int gr_gk20a_init_comptag(struct gk20a *g, struct gr_gk20a *gr)
2433 struct mem_mgr *memmgr = mem_mgr_from_g(g);
2435 /* max memory size (MB) to cover */
2436 u32 max_size = gr->max_comptag_mem;
2437 /* one tag line covers 128KB */
2438 u32 max_comptag_lines = max_size << 3;
2440 u32 hw_max_comptag_lines =
2441 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
2444 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
2445 u32 comptags_per_cacheline =
2446 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
2447 u32 slices_per_fbp =
2448 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param);
2449 u32 cacheline_size =
2450 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
2452 u32 compbit_backing_size;
2457 if (max_comptag_lines == 0) {
2458 gr->compbit_store.mem.size = 0;
2462 if (max_comptag_lines > hw_max_comptag_lines)
2463 max_comptag_lines = hw_max_comptag_lines;
2466 compbit_backing_size =
2467 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
2468 cacheline_size * slices_per_fbp * gr->num_fbps;
2470 /* aligned to 2KB * num_fbps */
2471 compbit_backing_size +=
2472 gr->num_fbps << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
2474 /* must be a multiple of 64KB */
2475 compbit_backing_size = roundup(compbit_backing_size, 64*1024);
2478 (compbit_backing_size * comptags_per_cacheline) /
2479 cacheline_size * slices_per_fbp * gr->num_fbps;
2481 if (max_comptag_lines > hw_max_comptag_lines)
2482 max_comptag_lines = hw_max_comptag_lines;
2484 nvhost_dbg_info("compbit backing store size : %d",
2485 compbit_backing_size);
2486 nvhost_dbg_info("max comptag lines : %d",
2489 gr->compbit_store.mem.ref =
2490 nvhost_memmgr_alloc(memmgr, compbit_backing_size,
2491 DEFAULT_ALLOC_ALIGNMENT,
2492 DEFAULT_ALLOC_FLAGS,
2494 if (IS_ERR(gr->compbit_store.mem.ref)) {
2495 nvhost_err(dev_from_gk20a(g), "failed to allocate"
2496 "backing store for compbit : size %d",
2497 compbit_backing_size);
2498 return PTR_ERR(gr->compbit_store.mem.ref);
2500 gr->compbit_store.mem.size = compbit_backing_size;
2502 gr->compbit_store.mem.sgt =
2503 nvhost_memmgr_sg_table(memmgr, gr->compbit_store.mem.ref);
2504 if (IS_ERR(gr->compbit_store.mem.sgt)) {
2505 ret = PTR_ERR(gr->compbit_store.mem.sgt);
2508 #ifdef CONFIG_TEGRA_IOMMU_SMMU
2509 ret = nvhost_memmgr_smmu_map(gr->compbit_store.mem.sgt,
2510 compbit_backing_size, dev_from_gk20a(g));
2514 gr->compbit_store.base_pa =
2515 gk20a_mm_iova_addr(gr->compbit_store.mem.sgt->sgl);
2517 nvhost_allocator_init(&gr->comp_tags, "comptag",
2519 max_comptag_lines - 1, /* length*/
2525 if (gr->compbit_store.mem.sgt)
2526 nvhost_memmgr_free_sg_table(memmgr, gr->compbit_store.mem.ref,
2527 gr->compbit_store.mem.sgt);
2528 nvhost_memmgr_put(memmgr, gr->compbit_store.mem.ref);
2532 int gk20a_gr_clear_comptags(struct gk20a *g, u32 min, u32 max)
2534 struct gr_gk20a *gr = &g->gr;
2535 u32 fbp, slice, ctrl1, val;
2536 unsigned long end_jiffies = jiffies +
2537 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
2538 u32 delay = GR_IDLE_CHECK_DEFAULT;
2539 u32 slices_per_fbp =
2540 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
2541 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
2545 if (gr->compbit_store.mem.size == 0)
2548 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
2549 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
2550 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
2551 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
2552 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
2553 gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) |
2554 ltc_ltcs_ltss_cbc_ctrl1_clear_active_f());
2556 for (fbp = 0; fbp < gr->num_fbps; fbp++) {
2557 for (slice = 0; slice < slices_per_fbp; slice++) {
2559 delay = GR_IDLE_CHECK_DEFAULT;
2561 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
2562 fbp * proj_ltc_pri_stride_v() +
2563 slice * proj_lts_pri_stride_v();
2566 val = gk20a_readl(g, ctrl1);
2567 if (ltc_ltc0_lts0_cbc_ctrl1_clear_v(val) !=
2568 ltc_ltc0_lts0_cbc_ctrl1_clear_active_v())
2571 usleep_range(delay, delay * 2);
2572 delay = min_t(u32, delay << 1,
2575 } while (time_before(jiffies, end_jiffies) |
2576 !tegra_platform_is_silicon());
2578 if (!time_before(jiffies, end_jiffies)) {
2579 nvhost_err(dev_from_gk20a(g),
2580 "comp tag clear timeout\n");
2589 static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
2591 struct gr_zcull_gk20a *zcull = &gr->zcull;
2593 zcull->aliquot_width = gr->tpc_count * 16;
2594 zcull->aliquot_height = 16;
2596 zcull->width_align_pixels = gr->tpc_count * 16;
2597 zcull->height_align_pixels = 32;
2599 zcull->aliquot_size =
2600 zcull->aliquot_width * zcull->aliquot_height;
2602 /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
2603 zcull->pixel_squares_by_aliquots =
2604 gr->zcb_count * 16 * 16 * gr->tpc_count /
2605 (gr->gpc_count * gr->gpc_tpc_count[0]);
2607 zcull->total_aliquots =
2608 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
2609 gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
2614 u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
2616 /* assuming gr has already been initialized */
2617 return gr->ctx_vars.zcull_ctxsw_image_size;
2620 int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
2621 struct channel_gk20a *c, u64 zcull_va, u32 mode)
2623 struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
2625 zcull_ctx->ctx_sw_mode = mode;
2626 zcull_ctx->gpu_va = zcull_va;
2628 /* TBD: don't disable channel in sw method processing */
2629 return gr_gk20a_ctx_zcull_setup(g, c, true);
2632 int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
2633 struct gr_zcull_info *zcull_params)
2635 struct gr_zcull_gk20a *zcull = &gr->zcull;
2637 zcull_params->width_align_pixels = zcull->width_align_pixels;
2638 zcull_params->height_align_pixels = zcull->height_align_pixels;
2639 zcull_params->pixel_squares_by_aliquots =
2640 zcull->pixel_squares_by_aliquots;
2641 zcull_params->aliquot_total = zcull->total_aliquots;
2643 zcull_params->region_byte_multiplier =
2644 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
2645 zcull_params->region_header_size =
2646 proj_scal_litter_num_gpcs_v() *
2647 gr_zcull_save_restore_header_bytes_per_gpc_v();
2649 zcull_params->subregion_header_size =
2650 proj_scal_litter_num_gpcs_v() *
2651 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
2653 zcull_params->subregion_width_align_pixels =
2654 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
2655 zcull_params->subregion_height_align_pixels =
2656 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
2657 zcull_params->subregion_count = gr_zcull_subregion_qty_v();
2662 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
2663 struct zbc_entry *color_val, u32 index)
2665 struct fifo_gk20a *f = &g->fifo;
2666 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2668 unsigned long end_jiffies = jiffies +
2669 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
2672 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2674 nvhost_err(dev_from_gk20a(g),
2675 "failed to disable gr engine activity\n");
2679 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
2681 nvhost_err(dev_from_gk20a(g),
2682 "failed to idle graphics\n");
2686 /* update l2 table */
2687 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2688 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2689 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2690 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2691 GK20A_STARTOF_ZBC_TABLE));
2693 for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
2694 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
2695 color_val->color_l2[i]);
2697 /* update ds table */
2698 gk20a_writel(g, gr_ds_zbc_color_r_r(),
2699 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
2700 gk20a_writel(g, gr_ds_zbc_color_g_r(),
2701 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
2702 gk20a_writel(g, gr_ds_zbc_color_b_r(),
2703 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
2704 gk20a_writel(g, gr_ds_zbc_color_a_r(),
2705 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
2707 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
2708 gr_ds_zbc_color_fmt_val_f(color_val->format));
2710 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2711 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2713 /* trigger the write */
2714 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2715 gr_ds_zbc_tbl_ld_select_c_f() |
2716 gr_ds_zbc_tbl_ld_action_write_f() |
2717 gr_ds_zbc_tbl_ld_trigger_active_f());
2719 /* update local copy */
2720 for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) {
2721 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
2722 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
2724 gr->zbc_col_tbl[index].format = color_val->format;
2725 gr->zbc_col_tbl[index].ref_cnt++;
2728 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2730 nvhost_err(dev_from_gk20a(g),
2731 "failed to enable gr engine activity\n");
2737 static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
2738 struct zbc_entry *depth_val, u32 index)
2740 struct fifo_gk20a *f = &g->fifo;
2741 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2742 unsigned long end_jiffies = jiffies +
2743 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
2746 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2748 nvhost_err(dev_from_gk20a(g),
2749 "failed to disable gr engine activity\n");
2753 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
2755 nvhost_err(dev_from_gk20a(g),
2756 "failed to idle graphics\n");
2760 /* update l2 table */
2761 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2762 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2763 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2764 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2765 GK20A_STARTOF_ZBC_TABLE));
2767 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
2770 /* update ds table */
2771 gk20a_writel(g, gr_ds_zbc_z_r(),
2772 gr_ds_zbc_z_val_f(depth_val->depth));
2774 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
2775 gr_ds_zbc_z_fmt_val_f(depth_val->format));
2777 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2778 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2780 /* trigger the write */
2781 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2782 gr_ds_zbc_tbl_ld_select_z_f() |
2783 gr_ds_zbc_tbl_ld_action_write_f() |
2784 gr_ds_zbc_tbl_ld_trigger_active_f());
2786 /* update local copy */
2787 gr->zbc_dep_tbl[index].depth = depth_val->depth;
2788 gr->zbc_dep_tbl[index].format = depth_val->format;
2789 gr->zbc_dep_tbl[index].ref_cnt++;
2792 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2794 nvhost_err(dev_from_gk20a(g),
2795 "failed to enable gr engine activity\n");
2801 int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
2802 struct zbc_entry *zbc_val)
2804 struct zbc_color_table *c_tbl;
2805 struct zbc_depth_table *d_tbl;
2806 u32 i, ret = -ENOMEM;
2809 /* no endian swap ? */
2811 switch (zbc_val->type) {
2812 case GK20A_ZBC_TYPE_COLOR:
2813 /* search existing tables */
2814 for (i = 0; i < gr->max_used_color_index; i++) {
2816 c_tbl = &gr->zbc_col_tbl[i];
2818 if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
2819 memcmp(c_tbl->color_ds, zbc_val->color_ds,
2820 sizeof(zbc_val->color_ds)) == 0) {
2822 if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
2823 sizeof(zbc_val->color_l2))) {
2824 nvhost_err(dev_from_gk20a(g),
2825 "zbc l2 and ds color don't match with existing entries");
2836 gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
2839 &gr->zbc_col_tbl[gr->max_used_color_index];
2840 WARN_ON(c_tbl->ref_cnt != 0);
2842 ret = gr_gk20a_add_zbc_color(g, gr,
2843 zbc_val, gr->max_used_color_index);
2846 gr->max_used_color_index++;
2849 case GK20A_ZBC_TYPE_DEPTH:
2850 /* search existing tables */
2851 for (i = 0; i < gr->max_used_depth_index; i++) {
2853 d_tbl = &gr->zbc_dep_tbl[i];
2855 if (d_tbl->ref_cnt &&
2856 d_tbl->depth == zbc_val->depth &&
2857 d_tbl->format == zbc_val->format) {
2866 gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
2869 &gr->zbc_dep_tbl[gr->max_used_depth_index];
2870 WARN_ON(d_tbl->ref_cnt != 0);
2872 ret = gr_gk20a_add_zbc_depth(g, gr,
2873 zbc_val, gr->max_used_depth_index);
2876 gr->max_used_depth_index++;
2880 nvhost_err(dev_from_gk20a(g),
2881 "invalid zbc table type %d", zbc_val->type);
2885 if (added && ret == 0) {
2886 /* update zbc for elpg */
2892 int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
2894 struct fifo_gk20a *f = &g->fifo;
2895 struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2897 unsigned long end_jiffies = jiffies +
2898 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
2901 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2903 nvhost_err(dev_from_gk20a(g),
2904 "failed to disable gr engine activity\n");
2908 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
2910 nvhost_err(dev_from_gk20a(g),
2911 "failed to idle graphics\n");
2915 for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
2916 gr->zbc_col_tbl[i].format = 0;
2917 gr->zbc_col_tbl[i].ref_cnt = 0;
2919 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
2920 gr_ds_zbc_color_fmt_val_invalid_f());
2921 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2922 gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
2924 /* trigger the write */
2925 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2926 gr_ds_zbc_tbl_ld_select_c_f() |
2927 gr_ds_zbc_tbl_ld_action_write_f() |
2928 gr_ds_zbc_tbl_ld_trigger_active_f());
2930 /* clear l2 table */
2931 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2932 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2933 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2934 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
2935 GK20A_STARTOF_ZBC_TABLE));
2937 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++) {
2938 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
2939 gr->zbc_col_tbl[i].color_l2[j] = 0;
2940 gr->zbc_col_tbl[i].color_ds[j] = 0;
2943 gr->max_used_color_index = 0;
2944 gr->max_default_color_index = 0;
2946 for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
2947 gr->zbc_dep_tbl[i].depth = 0;
2948 gr->zbc_dep_tbl[i].format = 0;
2949 gr->zbc_dep_tbl[i].ref_cnt = 0;
2951 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
2952 gr_ds_zbc_z_fmt_val_invalid_f());
2953 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2954 gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
2956 /* trigger the write */
2957 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2958 gr_ds_zbc_tbl_ld_select_z_f() |
2959 gr_ds_zbc_tbl_ld_action_write_f() |
2960 gr_ds_zbc_tbl_ld_trigger_active_f());
2962 /* clear l2 table */
2963 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2964 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2965 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2966 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
2967 GK20A_STARTOF_ZBC_TABLE));
2969 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
2971 gr->max_used_depth_index = 0;
2972 gr->max_default_depth_index = 0;
2975 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2977 nvhost_err(dev_from_gk20a(g),
2978 "failed to enable gr engine activity\n");
2986 /* get a zbc table entry specified by index
2987 * return table size when type is invalid */
2988 int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
2989 struct zbc_query_params *query_params)
2991 u32 index = query_params->index_size;
2994 switch (query_params->type) {
2995 case GK20A_ZBC_TYPE_INVALID:
2996 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
2998 case GK20A_ZBC_TYPE_COLOR:
2999 if (index >= GK20A_ZBC_TABLE_SIZE) {
3000 nvhost_err(dev_from_gk20a(g),
3001 "invalid zbc color table index\n");
3004 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3005 query_params->color_l2[i] =
3006 gr->zbc_col_tbl[index].color_l2[i];
3007 query_params->color_ds[i] =
3008 gr->zbc_col_tbl[index].color_ds[i];
3010 query_params->format = gr->zbc_col_tbl[index].format;
3011 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
3013 case GK20A_ZBC_TYPE_DEPTH:
3014 if (index >= GK20A_ZBC_TABLE_SIZE) {
3015 nvhost_err(dev_from_gk20a(g),
3016 "invalid zbc depth table index\n");
3019 query_params->depth = gr->zbc_dep_tbl[index].depth;
3020 query_params->format = gr->zbc_dep_tbl[index].format;
3021 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
3024 nvhost_err(dev_from_gk20a(g),
3025 "invalid zbc table type\n");
3032 static int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
3034 struct zbc_entry zbc_val;
3037 /* load default color table */
3038 zbc_val.type = GK20A_ZBC_TYPE_COLOR;
3040 zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
3041 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3042 zbc_val.color_ds[i] = 0;
3043 zbc_val.color_l2[i] = 0;
3045 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3047 zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
3048 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3049 zbc_val.color_ds[i] = 0xffffffff;
3050 zbc_val.color_l2[i] = 0x3f800000;
3052 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3054 zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3055 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3056 zbc_val.color_ds[i] = 0;
3057 zbc_val.color_l2[i] = 0;
3059 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3061 zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3062 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3063 zbc_val.color_ds[i] = 0x3f800000;
3064 zbc_val.color_l2[i] = 0x3f800000;
3066 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3069 gr->max_default_color_index = 4;
3071 nvhost_err(dev_from_gk20a(g),
3072 "fail to load default zbc color table\n");
3076 /* load default depth table */
3077 zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
3079 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3081 err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3083 zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3084 zbc_val.depth = 0x3f800000;
3085 err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3088 gr->max_default_depth_index = 2;
3090 nvhost_err(dev_from_gk20a(g),
3091 "fail to load default zbc depth table\n");
3098 static int gr_gk20a_init_zbc(struct gk20a *g, struct gr_gk20a *gr)
3102 /* reset zbc clear */
3103 for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE -
3104 GK20A_STARTOF_ZBC_TABLE; i++) {
3105 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3106 (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3107 ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3108 ltc_ltcs_ltss_dstg_zbc_index_address_f(
3109 i + GK20A_STARTOF_ZBC_TABLE));
3110 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++)
3111 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
3112 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
3115 gr_gk20a_clear_zbc_table(g, gr);
3117 gr_gk20a_load_zbc_default_table(g, gr);
3122 int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
3123 struct zbc_entry *zbc_val)
3127 return gr_gk20a_elpg_protected_call(g,
3128 gr_gk20a_add_zbc(g, gr, zbc_val));
3131 void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
3133 u32 gate_ctrl, idle_filter;
3135 gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
3139 gate_ctrl = set_field(gate_ctrl,
3140 therm_gate_ctrl_eng_clk_m(),
3141 therm_gate_ctrl_eng_clk_run_f());
3142 gate_ctrl = set_field(gate_ctrl,
3143 therm_gate_ctrl_eng_pwr_m(),
3144 /* set elpg to auto to meet hw expectation */
3145 therm_gate_ctrl_eng_pwr_auto_f());
3148 gate_ctrl = set_field(gate_ctrl,
3149 therm_gate_ctrl_eng_clk_m(),
3150 therm_gate_ctrl_eng_clk_stop_f());
3153 gate_ctrl = set_field(gate_ctrl,
3154 therm_gate_ctrl_eng_clk_m(),
3155 therm_gate_ctrl_eng_clk_auto_f());
3158 nvhost_err(dev_from_gk20a(g),
3159 "invalid elcg mode %d", mode);
3162 if (tegra_platform_is_linsim()) {
3163 gate_ctrl = set_field(gate_ctrl,
3164 therm_gate_ctrl_eng_delay_after_m(),
3165 therm_gate_ctrl_eng_delay_after_f(4));
3168 /* 2 * (1 << 9) = 1024 clks */
3169 gate_ctrl = set_field(gate_ctrl,
3170 therm_gate_ctrl_eng_idle_filt_exp_m(),
3171 therm_gate_ctrl_eng_idle_filt_exp_f(9));
3172 gate_ctrl = set_field(gate_ctrl,
3173 therm_gate_ctrl_eng_idle_filt_mant_m(),
3174 therm_gate_ctrl_eng_idle_filt_mant_f(2));
3175 gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
3177 /* default fecs_idle_filter to 0 */
3178 idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
3179 idle_filter &= ~therm_fecs_idle_filter_value_m();
3180 gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
3181 /* default hubmmu_idle_filter to 0 */
3182 idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
3183 idle_filter &= ~therm_hubmmu_idle_filter_value_m();
3184 gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
3187 static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
3189 u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
3190 u32 *zcull_map_tiles, *zcull_bank_counters;
3194 bool floorsweep = false;
3199 zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
3200 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3201 zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
3202 proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3204 if (!zcull_map_tiles && !zcull_bank_counters) {
3205 nvhost_err(dev_from_gk20a(g),
3206 "failed to allocate zcull temp buffers");
3210 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
3211 zcull_map_tiles[map_counter] =
3212 zcull_bank_counters[gr->map_tiles[map_counter]];
3213 zcull_bank_counters[gr->map_tiles[map_counter]]++;
3216 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
3217 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
3218 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
3219 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
3220 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
3221 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
3222 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
3223 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
3224 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
3226 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
3227 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
3228 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
3229 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
3230 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
3231 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
3232 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
3233 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
3234 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
3236 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
3237 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
3238 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
3239 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
3240 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
3241 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
3242 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
3243 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
3244 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
3246 gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
3247 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
3248 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
3249 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
3250 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
3251 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
3252 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
3253 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
3254 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
3256 kfree(zcull_map_tiles);
3257 kfree(zcull_bank_counters);
3259 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3260 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
3261 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
3263 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3264 gpc_zcull_count < gpc_tpc_count) {
3265 nvhost_err(dev_from_gk20a(g),
3266 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
3267 gpc_zcull_count, gpc_tpc_count, gpc_index);
3270 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3271 gpc_zcull_count != 0)
3275 /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
3276 rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
3278 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3279 offset = gpc_index * proj_gpc_stride_v();
3282 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3283 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3284 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3285 gr->max_zcull_per_gpc_count));
3287 gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3288 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3289 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3290 gr->gpc_tpc_count[gpc_index]));
3293 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
3294 gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
3295 gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
3297 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
3298 gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
3301 gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
3302 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
3307 static int gk20a_init_gr_setup_hw(struct gk20a *g)
3309 struct gr_gk20a *gr = &g->gr;
3310 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
3311 struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
3312 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
3314 u32 addr_lo, addr_hi, addr;
3315 u32 compbit_base_post_divide;
3316 u64 compbit_base_post_multiply64;
3317 unsigned long end_jiffies = jiffies +
3318 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
3319 u32 fe_go_idle_timeout_save;
3320 u32 last_bundle_data = 0;
3321 u32 last_method_data = 0;
3323 u32 l1c_dbg_reg_val;
3327 /* slcg prod values */
3328 gr_gk20a_slcg_gr_load_gating_prod(g, true);
3329 gr_gk20a_slcg_perf_load_gating_prod(g, true);
3331 /* init mmu debug buffer */
3332 addr_lo = u64_lo32(sg_phys(gr->mmu_wr_mem.mem.sgt->sgl));
3333 addr_hi = u64_hi32(sg_phys(gr->mmu_wr_mem.mem.sgt->sgl));
3334 addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
3335 (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
3337 gk20a_writel(g, fb_mmu_debug_wr_r(),
3338 fb_mmu_debug_wr_aperture_vid_mem_f() |
3339 fb_mmu_debug_wr_vol_false_f() |
3340 fb_mmu_debug_wr_addr_v(addr));
3342 addr_lo = u64_lo32(sg_phys(gr->mmu_rd_mem.mem.sgt->sgl));
3343 addr_hi = u64_hi32(sg_phys(gr->mmu_rd_mem.mem.sgt->sgl));
3344 addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
3345 (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
3347 gk20a_writel(g, fb_mmu_debug_rd_r(),
3348 fb_mmu_debug_rd_aperture_vid_mem_f() |
3349 fb_mmu_debug_rd_vol_false_f() |
3350 fb_mmu_debug_rd_addr_v(addr));
3352 /* load gr floorsweeping registers */
3353 data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
3354 data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
3355 gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
3356 gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
3358 gr_gk20a_zcull_init_hw(g, gr);
3360 gr_gk20a_blcg_gr_load_gating_prod(g, true);
3361 gr_gk20a_pg_gr_load_gating_prod(g, true);
3363 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
3364 gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
3366 /* Bug 1340570: increase the clock timeout to avoid potential
3367 * operation failure at high gpcclk rate. Default values are 0x400.
3369 gk20a_writel(g, pri_ringstation_sys_master_config_r(0x15), 0x800);
3370 gk20a_writel(g, pri_ringstation_gpc_master_config_r(0xa), 0x800);
3371 gk20a_writel(g, pri_ringstation_fbp_master_config_r(0x8), 0x800);
3373 /* enable fifo access */
3374 gk20a_writel(g, gr_gpfifo_ctl_r(),
3375 gr_gpfifo_ctl_access_enabled_f() |
3376 gr_gpfifo_ctl_semaphore_access_enabled_f());
3378 /* TBD: reload gr ucode when needed */
3380 /* enable interrupts */
3381 gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
3382 gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
3384 /* enable fecs error interrupts */
3385 gk20a_writel(g, gr_fecs_host_int_enable_r(),
3386 gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
3387 gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
3388 gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
3389 gr_fecs_host_int_enable_watchdog_enable_f());
3391 /* enable exceptions */
3392 gk20a_writel(g, gr_fe_hww_esr_r(),
3393 gr_fe_hww_esr_en_enable_f() |
3394 gr_fe_hww_esr_reset_active_f());
3395 gk20a_writel(g, gr_memfmt_hww_esr_r(),
3396 gr_memfmt_hww_esr_en_enable_f() |
3397 gr_memfmt_hww_esr_reset_active_f());
3398 gk20a_writel(g, gr_scc_hww_esr_r(),
3399 gr_scc_hww_esr_en_enable_f() |
3400 gr_scc_hww_esr_reset_active_f());
3401 gk20a_writel(g, gr_mme_hww_esr_r(),
3402 gr_mme_hww_esr_en_enable_f() |
3403 gr_mme_hww_esr_reset_active_f());
3404 gk20a_writel(g, gr_pd_hww_esr_r(),
3405 gr_pd_hww_esr_en_enable_f() |
3406 gr_pd_hww_esr_reset_active_f());
3407 gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
3408 gr_sked_hww_esr_reset_active_f());
3409 gk20a_writel(g, gr_ds_hww_esr_r(),
3410 gr_ds_hww_esr_en_enabled_f() |
3411 gr_ds_hww_esr_reset_task_f());
3412 gk20a_writel(g, gr_ds_hww_report_mask_r(),
3413 gr_ds_hww_report_mask_sph0_err_report_f() |
3414 gr_ds_hww_report_mask_sph1_err_report_f() |
3415 gr_ds_hww_report_mask_sph2_err_report_f() |
3416 gr_ds_hww_report_mask_sph3_err_report_f() |
3417 gr_ds_hww_report_mask_sph4_err_report_f() |
3418 gr_ds_hww_report_mask_sph5_err_report_f() |
3419 gr_ds_hww_report_mask_sph6_err_report_f() |
3420 gr_ds_hww_report_mask_sph7_err_report_f() |
3421 gr_ds_hww_report_mask_sph8_err_report_f() |
3422 gr_ds_hww_report_mask_sph9_err_report_f() |
3423 gr_ds_hww_report_mask_sph10_err_report_f() |
3424 gr_ds_hww_report_mask_sph11_err_report_f() |
3425 gr_ds_hww_report_mask_sph12_err_report_f() |
3426 gr_ds_hww_report_mask_sph13_err_report_f() |
3427 gr_ds_hww_report_mask_sph14_err_report_f() |
3428 gr_ds_hww_report_mask_sph15_err_report_f() |
3429 gr_ds_hww_report_mask_sph16_err_report_f() |
3430 gr_ds_hww_report_mask_sph17_err_report_f() |
3431 gr_ds_hww_report_mask_sph18_err_report_f() |
3432 gr_ds_hww_report_mask_sph19_err_report_f() |
3433 gr_ds_hww_report_mask_sph20_err_report_f() |
3434 gr_ds_hww_report_mask_sph21_err_report_f() |
3435 gr_ds_hww_report_mask_sph22_err_report_f() |
3436 gr_ds_hww_report_mask_sph23_err_report_f());
3438 /* TBD: ECC for L1/SM */
3439 /* TBD: enable per GPC exceptions */
3440 /* TBD: enable per BE exceptions */
3442 /* reset and enable all exceptions */
3443 gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
3444 gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
3445 gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
3446 gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
3447 gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
3448 gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
3450 /* ignore status from some units */
3451 data = gk20a_readl(g, gr_status_mask_r());
3452 gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
3454 gr_gk20a_init_zbc(g, gr);
3457 u64 compbit_base_post_divide64 = (gr->compbit_store.base_pa >>
3458 ltc_ltc0_lts0_cbc_base_alignment_shift_v());
3459 do_div(compbit_base_post_divide64, gr->num_fbps);
3460 compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
3463 compbit_base_post_multiply64 = ((u64)compbit_base_post_divide *
3464 gr->num_fbps) << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
3466 if (compbit_base_post_multiply64 < gr->compbit_store.base_pa)
3467 compbit_base_post_divide++;
3469 gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
3470 compbit_base_post_divide);
3472 nvhost_dbg(dbg_info | dbg_map | dbg_pte,
3473 "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n",
3474 (u32)(gr->compbit_store.base_pa>>32),
3475 (u32)(gr->compbit_store.base_pa & 0xffffffff),
3476 compbit_base_post_divide);
3479 for (i = 0; i < sw_ctx_load->count; i++)
3480 gk20a_writel(g, sw_ctx_load->l[i].addr,
3481 sw_ctx_load->l[i].value);
3483 /* TBD: add gr ctx overrides */
3485 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3489 /* save and disable fe_go_idle */
3490 fe_go_idle_timeout_save =
3491 gk20a_readl(g, gr_fe_go_idle_timeout_r());
3492 gk20a_writel(g, gr_fe_go_idle_timeout_r(),
3493 (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
3494 gr_fe_go_idle_timeout_count_disabled_f());
3496 /* override a few ctx state registers */
3497 gr_gk20a_commit_global_cb_manager(g, NULL, 0);
3498 gr_gk20a_commit_global_timeslice(g, NULL, 0);
3500 /* floorsweep anything left */
3501 gr_gk20a_ctx_state_floorsweep(g);
3503 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3505 goto restore_fe_go_idle;
3507 /* enable pipe mode override */
3508 gk20a_writel(g, gr_pipe_bundle_config_r(),
3509 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
3511 /* load bundle init */
3513 for (i = 0; i < sw_bundle_init->count; i++) {
3515 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
3516 gk20a_writel(g, gr_pipe_bundle_data_r(),
3517 sw_bundle_init->l[i].value);
3518 last_bundle_data = sw_bundle_init->l[i].value;
3521 gk20a_writel(g, gr_pipe_bundle_address_r(),
3522 sw_bundle_init->l[i].addr);
3524 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
3526 err |= gr_gk20a_wait_idle(g, end_jiffies,
3527 GR_IDLE_CHECK_DEFAULT);
3528 else if (0) { /* IS_SILICON */
3529 u32 delay = GR_IDLE_CHECK_DEFAULT;
3531 u32 gr_status = gk20a_readl(g, gr_status_r());
3533 if (gr_status_fe_method_lower_v(gr_status) ==
3534 gr_status_fe_method_lower_idle_v())
3537 usleep_range(delay, delay * 2);
3538 delay = min_t(u32, delay << 1,
3541 } while (time_before(jiffies, end_jiffies) |
3542 !tegra_platform_is_silicon());
3546 /* disable pipe mode override */
3547 gk20a_writel(g, gr_pipe_bundle_config_r(),
3548 gr_pipe_bundle_config_override_pipe_mode_disabled_f());
3551 /* restore fe_go_idle */
3552 gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
3554 if (err || gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT))
3557 /* load method init */
3558 if (sw_method_init->count) {
3559 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3560 sw_method_init->l[0].value);
3561 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3562 gr_pri_mme_shadow_raw_index_write_trigger_f() |
3563 sw_method_init->l[0].addr);
3564 last_method_data = sw_method_init->l[0].value;
3566 for (i = 1; i < sw_method_init->count; i++) {
3567 if (sw_method_init->l[i].value != last_method_data) {
3568 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3569 sw_method_init->l[i].value);
3570 last_method_data = sw_method_init->l[i].value;
3572 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3573 gr_pri_mme_shadow_raw_index_write_trigger_f() |
3574 sw_method_init->l[i].addr);
3577 gk20a_mm_l2_invalidate(g);
3579 /* hack: using hard-coded bits for now until
3580 * the reg l1c_dbg reg makes it into hw_gr_gk20a.h
3584 l1c_dbg_reg_val = gk20a_readl(g, 0x005044b0);
3585 // set the cya15 bit (27:27) to 1
3586 l1c_dbg_reg_val = l1c_dbg_reg_val | 0x08000000;
3587 gk20a_writel(g, 0x005044b0, l1c_dbg_reg_val);
3590 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3595 nvhost_dbg_fn("done");
3599 static int gk20a_init_gr_prepare(struct gk20a *g)
3601 u32 gpfifo_ctrl, pmc_en;
3604 /* disable fifo access */
3605 gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
3606 gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
3607 gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
3609 /* reset gr engine */
3610 pmc_en = gk20a_readl(g, mc_enable_r());
3611 pmc_en &= ~mc_enable_pgraph_enabled_f();
3612 pmc_en &= ~mc_enable_blg_enabled_f();
3613 pmc_en &= ~mc_enable_perfmon_enabled_f();
3614 gk20a_writel(g, mc_enable_r(), pmc_en);
3616 pmc_en = gk20a_readl(g, mc_enable_r());
3617 pmc_en |= mc_enable_pgraph_enabled_f();
3618 pmc_en |= mc_enable_blg_enabled_f();
3619 pmc_en |= mc_enable_perfmon_enabled_f();
3620 gk20a_writel(g, mc_enable_r(), pmc_en);
3621 pmc_en = gk20a_readl(g, mc_enable_r());
3623 /* enable fifo access */
3624 gk20a_writel(g, gr_gpfifo_ctl_r(),
3625 gr_gpfifo_ctl_access_enabled_f() |
3626 gr_gpfifo_ctl_semaphore_access_enabled_f());
3628 if (!g->gr.ctx_vars.valid) {
3629 err = gr_gk20a_init_ctx_vars(g, &g->gr);
3631 nvhost_err(dev_from_gk20a(g),
3632 "fail to load gr init ctx");
3637 static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
3639 struct gr_gk20a *gr = &g->gr;
3640 struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
3641 unsigned long end_jiffies = jiffies +
3642 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
3647 /* enable interrupts */
3648 gk20a_writel(g, gr_intr_r(), ~0);
3649 gk20a_writel(g, gr_intr_en_r(), ~0);
3651 /* reset ctx switch state */
3652 gr_gk20a_ctx_reset(g, 0);
3655 gk20a_writel(g, gr_scc_init_r(),
3656 gr_scc_init_ram_trigger_f());
3658 /* load non_ctx init */
3659 for (i = 0; i < sw_non_ctx_load->count; i++)
3660 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
3661 sw_non_ctx_load->l[i].value);
3663 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
3667 err = gr_gk20a_load_ctxsw_ucode(g, gr);
3671 /* this appears query for sw states but fecs actually init
3672 ramchain, etc so this is hw init */
3673 err = gr_gk20a_init_ctx_state(g, gr);
3679 nvhost_dbg(dbg_fn | dbg_err, "fail");
3681 nvhost_dbg_fn("done");
3686 static int gk20a_init_gr_setup_sw(struct gk20a *g)
3688 struct gr_gk20a *gr = &g->gr;
3694 nvhost_dbg_fn("skip init");
3700 err = gr_gk20a_init_gr_config(g, gr);
3704 err = gr_gk20a_init_mmu_sw(g, gr);
3708 err = gr_gk20a_init_map_tiles(g, gr);
3712 if (tegra_cpu_is_asim())
3713 gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
3715 nvhost_dbg_info("total ram pages : %lu", totalram_pages);
3716 gr->max_comptag_mem = totalram_pages
3717 >> (10 - (PAGE_SHIFT - 10));
3719 err = gr_gk20a_init_comptag(g, gr);
3723 err = gr_gk20a_init_zcull(g, gr);
3727 err = gr_gk20a_alloc_global_ctx_buffers(g);
3731 mutex_init(&gr->ctx_mutex);
3732 spin_lock_init(&gr->ch_tlb_lock);
3734 gr->remove_support = gk20a_remove_gr_support;
3735 gr->sw_ready = true;
3737 nvhost_dbg_fn("done");
3741 nvhost_dbg(dbg_fn | dbg_err, "fail");
3742 gk20a_remove_gr_support(gr);
3746 int gk20a_init_gr_support(struct gk20a *g)
3752 err = gk20a_init_gr_prepare(g);
3756 /* this is required before gr_gk20a_init_ctx_state */
3757 mutex_init(&g->gr.fecs_mutex);
3759 err = gk20a_init_gr_reset_enable_hw(g);
3763 err = gk20a_init_gr_setup_sw(g);
3767 err = gk20a_init_gr_setup_hw(g);
3774 #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc
3775 #define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280
3776 #define NVA297_SET_SHADER_EXCEPTIONS 0x1528
3777 #define NVA0C0_SET_SHADER_EXCEPTIONS 0x1528
3779 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
3781 struct gr_isr_data {
3792 static void gk20a_gr_set_shader_exceptions(struct gk20a *g,
3793 struct gr_isr_data *isr_data)
3799 if (isr_data->data_lo ==
3800 NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE)
3806 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
3809 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
3813 static void gk20a_gr_set_circular_buffer_size(struct gk20a *g,
3814 struct gr_isr_data *isr_data)
3816 struct gr_gk20a *gr = &g->gr;
3817 u32 gpc_index, ppc_index, stride, val, offset;
3818 u32 cb_size = isr_data->data_lo * 4;
3822 if (cb_size > gr->attrib_cb_size)
3823 cb_size = gr->attrib_cb_size;
3825 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3826 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3827 ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
3828 gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
3830 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3831 stride = proj_gpc_stride_v() * gpc_index;
3833 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3836 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
3838 proj_ppc_in_gpc_stride_v() * ppc_index);
3840 offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
3842 val = set_field(val,
3843 gr_gpc0_ppc0_cbm_cfg_size_m(),
3844 gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
3845 gr->pes_tpc_count[ppc_index][gpc_index]));
3846 val = set_field(val,
3847 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3850 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3852 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3854 val = set_field(val,
3855 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3858 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3860 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3865 static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g,
3866 struct gr_isr_data *isr_data)
3868 struct gr_gk20a *gr = &g->gr;
3869 u32 gpc_index, ppc_index, stride, val;
3870 u32 pd_ab_max_output;
3871 u32 alpha_cb_size = isr_data->data_lo * 4;
3874 /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
3877 if (alpha_cb_size > gr->alpha_cb_size)
3878 alpha_cb_size = gr->alpha_cb_size;
3880 gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3881 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3882 ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
3883 gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
3885 pd_ab_max_output = alpha_cb_size *
3886 gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
3887 gr_pd_ab_dist_cfg1_max_output_granularity_v();
3889 gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
3890 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
3892 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3893 stride = proj_gpc_stride_v() * gpc_index;
3895 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3898 val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3900 proj_ppc_in_gpc_stride_v() * ppc_index);
3902 val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
3903 gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
3904 gr->pes_tpc_count[ppc_index][gpc_index]));
3906 gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3908 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3913 void gk20a_gr_reset(struct gk20a *g)
3916 err = gk20a_init_gr_prepare(g);
3918 err = gk20a_init_gr_reset_enable_hw(g);
3920 err = gk20a_init_gr_setup_hw(g);
3924 static void gk20a_gr_nop_method(struct gk20a *g)
3926 /* Reset method in PBDMA 0 */
3927 gk20a_writel(g, pbdma_method0_r(0),
3928 pbdma_udma_nop_r());
3929 gk20a_writel(g, pbdma_data0_r(0), 0);
3932 static int gk20a_gr_handle_illegal_method(struct gk20a *g,
3933 struct gr_isr_data *isr_data)
3937 if (isr_data->class_num == KEPLER_COMPUTE_A) {
3938 switch (isr_data->offset << 2) {
3939 case NVA0C0_SET_SHADER_EXCEPTIONS:
3940 gk20a_gr_set_shader_exceptions(g, isr_data);
3947 if (isr_data->class_num == KEPLER_C) {
3948 switch (isr_data->offset << 2) {
3949 case NVA297_SET_SHADER_EXCEPTIONS:
3950 gk20a_gr_set_shader_exceptions(g, isr_data);
3952 case NVA297_SET_CIRCULAR_BUFFER_SIZE:
3953 gk20a_gr_set_circular_buffer_size(g, isr_data);
3955 case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
3956 gk20a_gr_set_alpha_circular_buffer_size(g, isr_data);
3966 gk20a_gr_nop_method(g);
3967 nvhost_err(dev_from_gk20a(g), "invalid method class 0x%08x"
3968 ", offset 0x%08x address 0x%08x\n",
3969 isr_data->class_num, isr_data->offset, isr_data->addr);
3973 static int gk20a_gr_handle_illegal_class(struct gk20a *g,
3974 struct gr_isr_data *isr_data)
3980 gk20a_gr_nop_method(g);
3981 nvhost_err(dev_from_gk20a(g),
3982 "invalid class 0x%08x, offset 0x%08x",
3983 isr_data->class_num, isr_data->offset);
3987 static int gk20a_gr_handle_class_error(struct gk20a *g,
3988 struct gr_isr_data *isr_data)
3994 gk20a_gr_nop_method(g);
3995 nvhost_err(dev_from_gk20a(g),
3996 "class error 0x%08x, offset 0x%08x",
3997 isr_data->class_num, isr_data->offset);
4001 static int gk20a_gr_handle_notify_pending(struct gk20a *g,
4002 struct gr_isr_data *isr_data)
4004 struct fifo_gk20a *f = &g->fifo;
4005 struct channel_gk20a *ch = &f->channel[isr_data->chid];
4007 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
4008 void *virtual_address;
4013 struct share_buffer_head *sh_hdr;
4017 struct gk20a_cyclestate_buffer_elem *op_elem;
4018 /* GL will never use payload 0 for cycle state */
4019 if ((ch->cyclestate.cyclestate_buffer == NULL) || (isr_data->data_lo == 0))
4022 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
4024 virtual_address = ch->cyclestate.cyclestate_buffer;
4025 buffer_size = ch->cyclestate.cyclestate_buffer_size;
4026 offset = isr_data->data_lo;
4029 if (offset >= buffer_size) {
4034 sh_hdr = (struct share_buffer_head *)
4035 ((char *)virtual_address + offset);
4037 if (sh_hdr->size < sizeof(struct share_buffer_head)) {
4041 new_offset = offset + sh_hdr->size;
4043 switch (sh_hdr->operation) {
4052 (struct gk20a_cyclestate_buffer_elem *)
4054 if (op_elem->offset_bar0 <
4055 TEGRA_GK20A_BAR0_SIZE) {
4058 (op_elem->last_bit + 1))
4060 op_elem->first_bit)-1);
4064 op_elem->offset_bar0);
4066 switch (sh_hdr->operation) {
4069 (raw_reg & mask_orig)
4070 >> op_elem->first_bit;
4075 if ((unsigned int)mask_orig !=
4078 (raw_reg & ~mask_orig);
4081 v |= ((op_elem->data
4082 << op_elem->first_bit)
4086 op_elem->offset_bar0,
4094 sh_hdr->failed = true;
4100 /* no operation content case */
4104 sh_hdr->completed = true;
4105 offset = new_offset;
4107 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
4110 wake_up(&ch->notifier_wq);
4114 /* Used by sw interrupt thread to translate current ctx to chid.
4115 * For performance, we don't want to go through 128 channels every time.
4116 * A small tlb is used here to cache translation */
4117 static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
4119 struct fifo_gk20a *f = &g->fifo;
4120 struct gr_gk20a *gr = &g->gr;
4123 struct scatterlist *ctx_sg;
4125 spin_lock(&gr->ch_tlb_lock);
4127 /* check cache first */
4128 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
4129 if (gr->chid_tlb[i].curr_ctx == curr_ctx) {
4130 chid = gr->chid_tlb[i].hw_chid;
4136 for (chid = 0; chid < f->num_channels; chid++)
4137 if (f->channel[chid].in_use) {
4138 ctx_sg = f->channel[chid].inst_block.mem.sgt->sgl;
4139 if ((u32)(sg_phys(ctx_sg) >> ram_in_base_shift_v()) ==
4140 gr_fecs_current_ctx_ptr_v(curr_ctx))
4144 if (chid >= f->num_channels) {
4149 /* add to free tlb entry */
4150 for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) {
4151 if (gr->chid_tlb[i].curr_ctx == 0) {
4152 gr->chid_tlb[i].curr_ctx = curr_ctx;
4153 gr->chid_tlb[i].hw_chid = chid;
4158 /* no free entry, flush one */
4159 gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
4160 gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
4162 gr->channel_tlb_flush_index =
4163 (gr->channel_tlb_flush_index + 1) &
4164 (GR_CHANNEL_MAP_TLB_SIZE - 1);
4167 spin_unlock(&gr->ch_tlb_lock);
4171 static struct channel_gk20a *
4172 channel_from_hw_chid(struct gk20a *g, u32 hw_chid)
4174 return g->fifo.channel+hw_chid;
4177 void gk20a_gr_isr(struct gk20a *g)
4179 struct gr_isr_data isr_data;
4183 u32 gr_intr = gk20a_readl(g, gr_intr_r());
4190 grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
4191 grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
4192 grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
4194 gk20a_writel(g, gr_gpfifo_ctl_r(),
4195 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
4196 gr_gpfifo_ctl_semaphore_access_f(0));
4198 isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
4199 isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
4200 isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
4201 isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
4202 isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
4203 isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
4204 obj_table = gk20a_readl(g,
4205 gr_fe_object_table_r(isr_data.sub_chan));
4206 isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
4209 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
4210 if (isr_data.chid == -1) {
4211 nvhost_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
4216 nvhost_dbg(dbg_intr, "channel %d: addr 0x%08x, "
4217 "data 0x%08x 0x%08x,"
4218 "ctx 0x%08x, offset 0x%08x, "
4219 "subchannel 0x%08x, class 0x%08x",
4220 isr_data.chid, isr_data.addr,
4221 isr_data.data_hi, isr_data.data_lo,
4222 isr_data.curr_ctx, isr_data.offset,
4223 isr_data.sub_chan, isr_data.class_num);
4225 if (gr_intr & gr_intr_notify_pending_f()) {
4226 gk20a_gr_handle_notify_pending(g, &isr_data);
4227 gk20a_writel(g, gr_intr_r(),
4228 gr_intr_notify_reset_f());
4229 gr_intr &= ~gr_intr_notify_pending_f();
4232 if (gr_intr & gr_intr_illegal_method_pending_f()) {
4233 ret = gk20a_gr_handle_illegal_method(g, &isr_data);
4234 gk20a_writel(g, gr_intr_r(),
4235 gr_intr_illegal_method_reset_f());
4236 gr_intr &= ~gr_intr_illegal_method_pending_f();
4239 if (gr_intr & gr_intr_illegal_class_pending_f()) {
4240 ret = gk20a_gr_handle_illegal_class(g, &isr_data);
4241 gk20a_writel(g, gr_intr_r(),
4242 gr_intr_illegal_class_reset_f());
4243 gr_intr &= ~gr_intr_illegal_class_pending_f();
4246 if (gr_intr & gr_intr_class_error_pending_f()) {
4247 ret = gk20a_gr_handle_class_error(g, &isr_data);
4248 gk20a_writel(g, gr_intr_r(),
4249 gr_intr_class_error_reset_f());
4250 gr_intr &= ~gr_intr_class_error_pending_f();
4253 if (gr_intr & gr_intr_exception_pending_f()) {
4254 u32 exception = gk20a_readl(g, gr_exception_r());
4256 nvhost_dbg(dbg_intr, "exception %08x\n", exception);
4258 if (exception & gr_exception_fe_m()) {
4259 u32 fe = gk20a_readl(g, gr_fe_hww_esr_r());
4260 nvhost_dbg(dbg_intr, "fe warning %08x\n", fe);
4261 gk20a_writel(g, gr_fe_hww_esr_r(), fe);
4263 gk20a_writel(g, gr_intr_r(),
4264 gr_intr_exception_reset_f());
4265 gr_intr &= ~gr_intr_exception_pending_f();
4269 struct channel_gk20a *fault_ch =
4270 channel_from_hw_chid(g, isr_data.chid);
4271 if (fault_ch && fault_ch->hwctx)
4272 gk20a_free_channel(fault_ch->hwctx, false);
4276 gk20a_writel(g, gr_gpfifo_ctl_r(),
4277 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
4278 gr_gpfifo_ctl_semaphore_access_f(1));
4281 nvhost_err(dev_from_gk20a(g),
4282 "unhandled gr interrupt 0x%08x", gr_intr);
4285 int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
4287 BUG_ON(size == NULL);
4288 return gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 1,
4289 gr_fecs_method_push_adr_discover_reglist_image_size_v(),
4290 size, GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
4293 int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
4295 return gr_gk20a_submit_fecs_method(g, 4,
4296 gr_fecs_current_ctx_ptr_f(addr >> 12) |
4297 gr_fecs_current_ctx_valid_f(1) | gr_fecs_current_ctx_target_vid_mem_f(),
4298 ~0, 1, gr_fecs_method_push_adr_set_reglist_bind_instance_f(),
4299 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4302 int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va)
4304 return gr_gk20a_submit_fecs_method(g, 4, u64_lo32(pmu_va >> 8),
4305 ~0, 1, gr_fecs_method_push_adr_set_reglist_virtual_address_f(),
4306 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4309 int gk20a_gr_suspend(struct gk20a *g)
4311 unsigned long end_jiffies = jiffies +
4312 msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
4317 ret = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4321 gk20a_writel(g, gr_gpfifo_ctl_r(),
4322 gr_gpfifo_ctl_access_disabled_f());
4324 /* disable gr intr */
4325 gk20a_writel(g, gr_intr_r(), 0);
4326 gk20a_writel(g, gr_intr_en_r(), 0);
4328 /* disable all exceptions */
4329 gk20a_writel(g, gr_exception_r(), 0);
4330 gk20a_writel(g, gr_exception_en_r(), 0);
4331 gk20a_writel(g, gr_exception1_r(), 0);
4332 gk20a_writel(g, gr_exception1_en_r(), 0);
4333 gk20a_writel(g, gr_exception2_r(), 0);
4334 gk20a_writel(g, gr_exception2_en_r(), 0);
4336 gk20a_gr_flush_channel_tlb(&g->gr);
4338 nvhost_dbg_fn("done");