drivers: video: host: check tegra revision at runtime
[linux-3.10.git] / drivers / video / tegra / host / gk20a / gr_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/gr_gk20a.c
3  *
4  * GK20A Graphics
5  *
6  * Copyright (c) 2011, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for udelay */
23 #include <linux/mm.h>           /* for totalram_pages */
24 #include <linux/nvmap.h>
25
26 #include "mach/hardware.h"
27
28 #include "../dev.h"
29
30 #include "gk20a.h"
31 #include "gr_ctx_gk20a.h"
32
33 #include "hw_ccsr_gk20a.h"
34 #include "hw_ctxsw_prog_gk20a.h"
35 #include "hw_gr_gk20a.h"
36 #include "hw_mc_gk20a.h"
37 #include "hw_ram_gk20a.h"
38 #include "hw_pri_ringmaster_gk20a.h"
39 #include "hw_proj_gk20a.h"
40 #include "hw_top_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42 #include "hw_fb_gk20a.h"
43 #include "hw_therm_gk20a.h"
44 #include "gk20a_gating_reglist.h"
45 #include "chip_support.h"
46 #include "nvhost_memmgr.h"
47
48 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
49 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
50                                     u32 addr, u32 data, u32 patch);
51
52 /* global ctx buffer */
53 static int  gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
54 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
55 static int  gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
56                                             struct channel_gk20a *c);
57 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
58
59 /* channel gr ctx buffer */
60 static int  gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
61                                         struct channel_gk20a *c);
62 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
63
64 /* channel patch ctx buffer */
65 static int  gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
66                                         struct channel_gk20a *c);
67 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
68
69 /* golden ctx image */
70 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
71                                           struct channel_gk20a *c);
72 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
73                                           struct channel_gk20a *c);
74
75 static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
76 {
77         u32 i, ucode_u32_size;
78         const u32 *ucode_u32_data;
79         u32 checksum;
80
81         nvhost_dbg_fn("");
82
83         gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
84                                               gr_gpccs_dmemc_blk_f(0)  |
85                                               gr_gpccs_dmemc_aincw_f(1)));
86
87         ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
88         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
89
90         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
91                 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
92                 checksum += ucode_u32_data[i];
93         }
94
95         gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
96                                              gr_fecs_dmemc_blk_f(0)  |
97                                              gr_fecs_dmemc_aincw_f(1)));
98
99         ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
100         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
101
102         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
103                 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
104                 checksum += ucode_u32_data[i];
105         }
106         nvhost_dbg_fn("done");
107 }
108
109 static void gr_gk20a_load_falcon_imem(struct gk20a *g)
110 {
111         u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
112         const u32 *ucode_u32_data;
113         u32 tag, i, pad_start, pad_end;
114         u32 checksum;
115
116         nvhost_dbg_fn("");
117
118         cfg = gk20a_readl(g, gr_fecs_cfg_r());
119         fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
120
121         cfg = gk20a_readl(g, gr_gpc0_cfg_r());
122         gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
123
124         /* Use the broadcast address to access all of the GPCCS units. */
125         gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
126                                               gr_gpccs_imemc_blk_f(0) |
127                                               gr_gpccs_imemc_aincw_f(1)));
128
129         /* Setup the tags for the instruction memory. */
130         tag = 0;
131         gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
132
133         ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
134         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
135
136         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
137                 if (i && ((i % (256/sizeof(u32))) == 0)) {
138                         tag++;
139                         gk20a_writel(g, gr_gpccs_imemt_r(0),
140                                       gr_gpccs_imemt_tag_f(tag));
141                 }
142                 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
143                 checksum += ucode_u32_data[i];
144         }
145
146         pad_start = i*4;
147         pad_end = pad_start+(256-pad_start%256)+256;
148         for (i = pad_start;
149              (i < gpccs_imem_size * 256) && (i < pad_end);
150              i += 4) {
151                 if (i && ((i % 256) == 0)) {
152                         tag++;
153                         gk20a_writel(g, gr_gpccs_imemt_r(0),
154                                       gr_gpccs_imemt_tag_f(tag));
155                 }
156                 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
157         }
158
159         gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
160                                              gr_fecs_imemc_blk_f(0) |
161                                              gr_fecs_imemc_aincw_f(1)));
162
163         /* Setup the tags for the instruction memory. */
164         tag = 0;
165         gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
166
167         ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
168         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
169
170         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
171                 if (i && ((i % (256/sizeof(u32))) == 0)) {
172                         tag++;
173                         gk20a_writel(g, gr_fecs_imemt_r(0),
174                                       gr_fecs_imemt_tag_f(tag));
175                 }
176                 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
177                 checksum += ucode_u32_data[i];
178         }
179
180         pad_start = i*4;
181         pad_end = pad_start+(256-pad_start%256)+256;
182         for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
183                 if (i && ((i % 256) == 0)) {
184                         tag++;
185                         gk20a_writel(g, gr_fecs_imemt_r(0),
186                                       gr_fecs_imemt_tag_f(tag));
187                 }
188                 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
189         }
190 }
191
192 #define GR_IDLE_TIMEOUT_DEFAULT 10000   /* 10 milliseconds */
193
194 static int gr_gk20a_wait_idle(struct gk20a *g, u32 *timeout)
195 {
196 #define GR_ENGINE_INDEX         0
197 #define GR_IDLE_CHECK_PERIOD    10              /* 10 usec */
198
199         u32 gr_engine_status;
200         u32 gr_status;
201         bool ctxsw_active = false;
202
203         nvhost_dbg_fn("");
204
205         do {
206                 u32 check = min_t(u32, GR_IDLE_CHECK_PERIOD, *timeout);
207
208                 /* fmodel: host gets fifo_engine_status(gr) from gr
209                    only when gr_status is read */
210                 gr_status = gk20a_readl(g, gr_status_r());
211
212                 gr_engine_status = gk20a_readl(g, gr_engine_status_r());
213
214                 if (!(gk20a_readl(g, mc_enable_r()) &
215                       mc_enable_pgraph_enabled_f()) ||
216                     (gr_engine_status_value_v(gr_engine_status) ==
217                      gr_engine_status_value_idle_v() &&
218                      !ctxsw_active)) {
219                         nvhost_dbg_fn("done");
220                         return 0;
221                 }
222
223                 udelay(GR_IDLE_CHECK_PERIOD);
224
225                 /* handle interrupts */
226
227                 *timeout -= check;
228
229         } while (*timeout);
230
231         nvhost_err(dev_from_gk20a(g), "timeout, status: %d",
232                    gr_engine_status);
233
234         return -1;
235 }
236
237 static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
238 {
239         nvhost_dbg_fn("");
240         /* FE_PWR_MODE_MODE_FORCE_ON for RTLSim and EMulation? */
241
242         if (rst_mask) {
243                 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
244         } else {
245                 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
246                              gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
247                              gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
248                              gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f()  |
249                              gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
250                              gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
251                              gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f()  |
252                              gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
253                              gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
254                              gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
255         }
256
257         /* Delay for > 10 nvclks after writing reset. */
258         gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
259
260         gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
261                      gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
262                      gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
263                      gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f()  |
264                      gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
265                      gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
266                      gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f()  |
267                      gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
268                      gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
269                      gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
270
271         /* Delay for > 10 nvclks after writing reset. */
272         gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
273
274         /* FE_PWR_MODE_MODE_AUTO for RTLSim and EMulation? */
275
276         return 0;
277 }
278
279 static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
280                                    u32 *mailbox_ret, u32 opc_success,
281                                    u32 mailbox_ok, u32 opc_fail,
282                                    u32 mailbox_fail)
283 {
284         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
285         u32 check = WAIT_UCODE_LOOP;
286         u32 reg;
287
288         nvhost_dbg_fn("");
289
290         while (check == WAIT_UCODE_LOOP) {
291                 if (timeout == 0)
292                         check = WAIT_UCODE_TIMEOUT;
293
294                 /* XXX when this register read was sped up by removing printks
295                  * (in sim) we had to increase GR_IDLE_TIMEOUT_DEFAULT in order
296                  * not to get spurious timeouts... that says to me udelay is
297                  * not doing what we think below...? */
298                 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
299
300                 if (mailbox_ret)
301                         *mailbox_ret = reg;
302
303                 switch (opc_success) {
304                 case GR_IS_UCODE_OP_EQUAL:
305                         if (reg == mailbox_ok)
306                                 check = WAIT_UCODE_OK;
307                         break;
308                 case GR_IS_UCODE_OP_NOT_EQUAL:
309                         if (reg != mailbox_ok)
310                                 check = WAIT_UCODE_OK;
311                         break;
312                 case GR_IS_UCODE_OP_AND:
313                         if (reg & mailbox_ok)
314                                 check = WAIT_UCODE_OK;
315                         break;
316                 case GR_IS_UCODE_OP_LESSER:
317                         if (reg < mailbox_ok)
318                                 check = WAIT_UCODE_OK;
319                         break;
320                 case GR_IS_UCODE_OP_LESSER_EQUAL:
321                         if (reg <= mailbox_ok)
322                                 check = WAIT_UCODE_OK;
323                         break;
324                 case GR_IS_UCODE_OP_SKIP:
325                         /* do no success check */
326                         break;
327                 default:
328                         nvhost_err(dev_from_gk20a(g),
329                                    "invalid success opcode 0x%x", opc_success);
330
331                         check = WAIT_UCODE_ERROR;
332                         break;
333                 }
334
335                 switch (opc_fail) {
336                 case GR_IS_UCODE_OP_EQUAL:
337                         if (reg == mailbox_fail)
338                                 check = WAIT_UCODE_ERROR;
339                         break;
340                 case GR_IS_UCODE_OP_NOT_EQUAL:
341                         if (reg != mailbox_fail)
342                                 check = WAIT_UCODE_ERROR;
343                         break;
344                 case GR_IS_UCODE_OP_AND:
345                         if (reg & mailbox_fail)
346                                 check = WAIT_UCODE_ERROR;
347                         break;
348                 case GR_IS_UCODE_OP_LESSER:
349                         if (reg < mailbox_fail)
350                                 check = WAIT_UCODE_ERROR;
351                         break;
352                 case GR_IS_UCODE_OP_LESSER_EQUAL:
353                         if (reg <= mailbox_fail)
354                                 check = WAIT_UCODE_ERROR;
355                         break;
356                 case GR_IS_UCODE_OP_SKIP:
357                         /* do no check on fail*/
358                         break;
359                 default:
360                         nvhost_err(dev_from_gk20a(g),
361                                    "invalid fail opcode 0x%x", opc_fail);
362                         check = WAIT_UCODE_ERROR;
363                         break;
364                 }
365
366                 udelay(10);
367                 timeout -= min_t(u32, GR_IDLE_CHECK_PERIOD, timeout);
368         }
369
370         if (check == WAIT_UCODE_TIMEOUT) {
371                 nvhost_err(dev_from_gk20a(g),
372                            "timeout waiting on ucode response");
373                 return -1;
374         } else if (check == WAIT_UCODE_ERROR) {
375                 nvhost_err(dev_from_gk20a(g),
376                            "ucode method failed on mailbox=%d value=0x%08x",
377                            mailbox_id, reg);
378                 return -1;
379         }
380
381         nvhost_dbg_fn("done");
382         return 0;
383 }
384
385 int gr_gk20a_submit_fecs_method(struct gk20a *g,
386                         u32 mb_id, u32 mb_data, u32 mb_clr,
387                         u32 mtd_data, u32 mtd_adr, u32 *mb_ret,
388                         u32 opc_ok, u32 mb_ok, u32 opc_fail, u32 mb_fail)
389 {
390         if (mb_id != 0)
391                 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(mb_id),
392                         mb_data);
393
394         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
395                 gr_fecs_ctxsw_mailbox_clear_value_f(mb_clr));
396
397         gk20a_writel(g, gr_fecs_method_data_r(), mtd_data);
398         gk20a_writel(g, gr_fecs_method_push_r(),
399                 gr_fecs_method_push_adr_f(mtd_adr));
400
401         return gr_gk20a_ctx_wait_ucode(g, 0, mb_ret,
402                 opc_ok, mb_ok, opc_fail, mb_fail);
403 }
404
405 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
406 {
407         u32 addr_lo;
408         u32 addr_hi;
409         u32 ret = 0;
410         void *inst_ptr = NULL;
411
412         nvhost_dbg_fn("");
413
414         inst_ptr = mem_op().mmap(c->inst_block.mem.ref);
415         if (IS_ERR(inst_ptr)) {
416                 ret = -ENOMEM;
417                 goto clean_up;
418         }
419
420         addr_lo = u64_lo32(gpu_va) >> 12;
421         addr_hi = u64_hi32(gpu_va);
422
423         mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
424                  ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
425                  ram_in_gr_wfi_ptr_lo_f(addr_lo));
426
427         mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
428                  ram_in_gr_wfi_ptr_hi_f(addr_hi));
429
430         mem_op().munmap(c->inst_block.mem.ref, inst_ptr);
431
432         return 0;
433
434 clean_up:
435         if (inst_ptr)
436                 mem_op().munmap(c->inst_block.mem.ref, inst_ptr);
437
438         return ret;
439 }
440
441 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
442                                     u32 addr, u32 data, u32 patch)
443 {
444         struct channel_ctx_gk20a *ch_ctx;
445         u32 patch_slot = 0;
446         void *patch_ptr = NULL;
447
448         nvhost_dbg_fn("");
449
450         BUG_ON(patch != 0 && c == NULL);
451
452         if (patch) {
453                 ch_ctx = &c->ch_ctx;
454                 patch_ptr = mem_op().mmap(ch_ctx->patch_ctx.mem.ref);
455                 if (IS_ERR(patch_ptr))
456                         return -ENOMEM;
457
458                 patch_slot = ch_ctx->patch_ctx.data_count * 2;
459
460                 mem_wr32(patch_ptr, patch_slot++, addr);
461                 mem_wr32(patch_ptr, patch_slot++, data);
462
463                 mem_op().munmap(ch_ctx->patch_ctx.mem.ref, patch_ptr);
464                 ch_ctx->patch_ctx.data_count++;
465         } else {
466                 gk20a_writel(g, addr, data);
467         }
468
469         return 0;
470 }
471
472 static int gr_gk20a_ctx_bind_first_channel(struct gk20a *g,
473                                         struct channel_gk20a *c)
474 {
475         u32 inst_base_ptr =
476                 u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
477         u32 ret;
478
479         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
480                    c->hw_chid, inst_base_ptr);
481
482         ret = gr_gk20a_submit_fecs_method(g, 0, 0, 0x30,
483                         gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
484                         gr_fecs_current_ctx_target_vid_mem_f() |
485                         gr_fecs_current_ctx_valid_f(1),
486                         gr_fecs_method_push_adr_bind_pointer_f(),
487                         0, GR_IS_UCODE_OP_AND, 0x10, GR_IS_UCODE_OP_AND, 0x20);
488         if (ret)
489                 nvhost_err(dev_from_gk20a(g),
490                         "bind channel instance failed");
491
492         return ret;
493 }
494
495 static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
496                                     bool disable_fifo)
497 {
498         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
499         struct fifo_gk20a *f = &g->fifo;
500         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
501         u32 va_lo, va_hi, va;
502         int ret = 0;
503         void *ctx_ptr = NULL;
504
505         nvhost_dbg_fn("");
506
507         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
508         if (IS_ERR(ctx_ptr))
509                 return -ENOMEM;
510
511         if (ch_ctx->zcull_ctx.gpu_va == 0 &&
512             ch_ctx->zcull_ctx.ctx_sw_mode ==
513                 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
514                 ret = -EINVAL;
515                 goto clean_up;
516         }
517
518         va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
519         va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
520         va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
521
522         if (disable_fifo) {
523                 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
524                 if (ret) {
525                         nvhost_err(dev_from_gk20a(g),
526                                 "failed to disable gr engine activity\n");
527                         goto clean_up;
528                 }
529         }
530
531         mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_v(), 0,
532                  ch_ctx->zcull_ctx.ctx_sw_mode);
533
534         mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, va);
535
536         if (disable_fifo) {
537                 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
538                 if (ret) {
539                         nvhost_err(dev_from_gk20a(g),
540                                 "failed to enable gr engine activity\n");
541                         goto clean_up;
542                 }
543         }
544
545 clean_up:
546         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
547
548         return ret;
549 }
550
551 static int gr_gk20a_ctx_pm_setup(struct gk20a *g, struct channel_gk20a *c,
552                                  bool disable_fifo)
553 {
554         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
555         u32 va_lo, va_hi, va;
556         int ret;
557         void *ctx_ptr = NULL;
558
559         nvhost_dbg_fn("");
560
561         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
562         if (IS_ERR(ctx_ptr))
563                 return -ENOMEM;
564
565         if (ch_ctx->pm_ctx.ctx_sw_mode ==
566             ctxsw_prog_main_image_pm_mode_ctxsw_v()) {
567
568                 if (ch_ctx->pm_ctx.gpu_va == 0) {
569                         ret = -ENOMEM;
570                         goto clean_up;
571                 }
572
573                 va_lo = u64_lo32(ch_ctx->pm_ctx.gpu_va);
574                 va_hi = u64_hi32(ch_ctx->pm_ctx.gpu_va);
575                 va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
576         } else {
577                 va_lo = va_hi = 0;
578                 va = 0;
579         }
580
581         /* TBD
582         if (disable_fifo)
583                 disable_engine_activity(...);
584         */
585
586         mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_v(), 0, ch_ctx->pm_ctx.ctx_sw_mode);
587         mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_v(), 0, va);
588
589         /* TBD
590         if (disable_fifo)
591                 enable_engine_activity(...);
592         */
593
594         nvhost_dbg_fn("done");
595
596 clean_up:
597         nvhost_dbg_fn("fail");
598         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
599
600         return ret;
601 }
602
603 static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
604                         struct channel_gk20a *c, u32 patch)
605 {
606         struct gr_gk20a *gr = &g->gr;
607         u32 attrib_offset_in_chunk = 0;
608         u32 alpha_offset_in_chunk = 0;
609         u32 pd_ab_max_output;
610         u32 gpc_index, ppc_index;
611         u32 temp;
612         u32 cbm_cfg_size1, cbm_cfg_size2;
613
614         nvhost_dbg_fn("");
615
616         gr_gk20a_ctx_patch_write(g, c, gr_ds_tga_constraintlogic_r(),
617                 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
618                 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
619                 patch);
620
621         pd_ab_max_output = (gr->alpha_cb_default_size *
622                 gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
623                 gr_pd_ab_dist_cfg1_max_output_granularity_v();
624
625         gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg1_r(),
626                 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
627                 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
628
629         alpha_offset_in_chunk = attrib_offset_in_chunk +
630                 gr->tpc_count * gr->attrib_cb_size;
631
632         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
633                 temp = proj_gpc_stride_v() * gpc_index;
634                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
635                      ppc_index++) {
636                         cbm_cfg_size1 = gr->attrib_cb_default_size *
637                                 gr->pes_tpc_count[ppc_index][gpc_index];
638                         cbm_cfg_size2 = gr->alpha_cb_default_size *
639                                 gr->pes_tpc_count[ppc_index][gpc_index];
640
641                         gr_gk20a_ctx_patch_write(g, c,
642                                 gr_gpc0_ppc0_cbm_cfg_r() + temp +
643                                 proj_ppc_in_gpc_stride_v() * ppc_index,
644                                 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
645                                 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
646                                 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
647
648                         attrib_offset_in_chunk += gr->attrib_cb_size *
649                                 gr->pes_tpc_count[ppc_index][gpc_index];
650
651                         gr_gk20a_ctx_patch_write(g, c,
652                                 gr_gpc0_ppc0_cbm_cfg2_r() + temp +
653                                 proj_ppc_in_gpc_stride_v() * ppc_index,
654                                 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
655                                 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
656
657                         alpha_offset_in_chunk += gr->alpha_cb_size *
658                                 gr->pes_tpc_count[ppc_index][gpc_index];
659                 }
660         }
661
662         return 0;
663 }
664
665 static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
666                         struct channel_gk20a *c, u32 patch)
667 {
668         struct gr_gk20a *gr = &g->gr;
669         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
670         u64 addr;
671         u32 size;
672         u32 data;
673
674         nvhost_dbg_fn("");
675
676         /* global pagepool */
677         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
678                 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
679                 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
680                  (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
681
682         size = gr->global_ctx_buffer[PAGEPOOL].size /
683                 gr_scc_pagepool_total_pages_byte_granularity_v();
684
685         if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
686                 size = gr_scc_pagepool_total_pages_hwmax_v();
687
688         nvhost_dbg_info("pagepool addr : 0x%016llx, size : %d",
689                 addr, size);
690
691         gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_base_r(),
692                 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
693
694         gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_r(),
695                 gr_scc_pagepool_total_pages_f(size) |
696                 gr_scc_pagepool_valid_true_f(), patch);
697
698         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_base_r(),
699                 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
700
701         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_r(),
702                 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
703
704         gr_gk20a_ctx_patch_write(g, c, gr_pd_pagepool_r(),
705                 gr_pd_pagepool_total_pages_f(size) |
706                 gr_pd_pagepool_valid_true_f(), patch);
707
708         /* global bundle cb */
709         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
710                 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
711                 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
712                  (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
713
714         size = gr->bundle_cb_default_size;
715
716         nvhost_dbg_info("global bundle cb addr : 0x%016llx, size : %d",
717                 addr, size);
718
719         gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_base_r(),
720                 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
721
722         gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_size_r(),
723                 gr_scc_bundle_cb_size_div_256b_f(size) |
724                 gr_scc_bundle_cb_size_valid_true_f(), patch);
725
726         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_base_r(),
727                 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
728
729         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_size_r(),
730                 gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
731                 gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
732
733         /* data for state_limit */
734         data = (gr->bundle_cb_default_size *
735                 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
736                 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
737
738         data = min_t(u32, data, gr->min_gpm_fifo_depth);
739
740         nvhost_dbg_info("global bundle cb token limit : %d, state limit : %d",
741                    gr->bundle_cb_token_limit, data);
742
743         gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg2_r(),
744                 gr_pd_ab_dist_cfg2_token_limit_f(gr->bundle_cb_token_limit) |
745                 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
746
747         /* global attrib cb */
748         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
749                 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
750                 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
751                  (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
752
753         nvhost_dbg_info("global attrib cb addr : 0x%016llx", addr);
754
755         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_attrib_cb_base_r(),
756                 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
757                 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
758
759         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
760                 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
761                 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
762
763         return 0;
764 }
765
766 static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, u32 patch)
767 {
768         struct gr_gk20a *gr = &g->gr;
769         u32 gpm_pd_cfg;
770         u32 pd_ab_dist_cfg0;
771         u32 ds_debug;
772         u32 mpc_vtg_debug;
773         u32 pe_vaf;
774         u32 pe_vsc_vpc;
775
776         nvhost_dbg_fn("");
777
778         gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
779         pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
780         ds_debug = gk20a_readl(g, gr_ds_debug_r());
781         mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
782
783         if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
784                 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
785                 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
786
787                 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
788                 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
789                 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
790                 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
791                 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
792                 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
793
794                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
795                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
796                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
797                 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
798                 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
799                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
800         } else {
801                 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
802                 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
803                 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
804                 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
805
806                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
807                 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
808                 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
809                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
810         }
811
812         return 0;
813 }
814
815 static int gr_gk20a_setup_rop_mapping(struct gk20a *g,
816                                 struct gr_gk20a *gr)
817 {
818         u32 norm_entries, norm_shift;
819         u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
820         u32 map0, map1, map2, map3, map4, map5;
821
822         if (!gr->map_tiles)
823                 return -1;
824
825         nvhost_dbg_fn("");
826
827         gk20a_writel(g, gr_crstr_map_table_cfg_r(),
828                      gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
829                      gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
830
831         map0 =  gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
832                 gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
833                 gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
834                 gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
835                 gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
836                 gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
837
838         map1 =  gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
839                 gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
840                 gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
841                 gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
842                 gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
843                 gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
844
845         map2 =  gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
846                 gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
847                 gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
848                 gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
849                 gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
850                 gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
851
852         map3 =  gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
853                 gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
854                 gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
855                 gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
856                 gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
857                 gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
858
859         map4 =  gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
860                 gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
861                 gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
862                 gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
863                 gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
864                 gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
865
866         map5 =  gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
867                 gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
868                 gr_crstr_gpc_map5_tile32_f(0) |
869                 gr_crstr_gpc_map5_tile33_f(0) |
870                 gr_crstr_gpc_map5_tile34_f(0) |
871                 gr_crstr_gpc_map5_tile35_f(0);
872
873         gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
874         gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
875         gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
876         gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
877         gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
878         gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
879
880         switch (gr->tpc_count) {
881         case 1:
882                 norm_shift = 4;
883                 break;
884         case 2:
885         case 3:
886                 norm_shift = 3;
887                 break;
888         case 4:
889         case 5:
890         case 6:
891         case 7:
892                 norm_shift = 2;
893                 break;
894         case 8:
895         case 9:
896         case 10:
897         case 11:
898         case 12:
899         case 13:
900         case 14:
901         case 15:
902                 norm_shift = 1;
903                 break;
904         default:
905                 norm_shift = 0;
906                 break;
907         }
908
909         norm_entries = gr->tpc_count << norm_shift;
910         coeff5_mod = (1 << 5) % norm_entries;
911         coeff6_mod = (1 << 6) % norm_entries;
912         coeff7_mod = (1 << 7) % norm_entries;
913         coeff8_mod = (1 << 8) % norm_entries;
914         coeff9_mod = (1 << 9) % norm_entries;
915         coeff10_mod = (1 << 10) % norm_entries;
916         coeff11_mod = (1 << 11) % norm_entries;
917
918         gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
919                      gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
920                      gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
921                      gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
922                      gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
923                      gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
924
925         gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
926                      gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
927                      gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
928                      gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
929                      gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
930                      gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
931                      gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
932
933         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
934         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
935         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
936         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
937         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
938         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
939
940         gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
941                      gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
942                      gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
943
944         gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
945         gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
946         gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
947         gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
948         gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
949         gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
950
951         return 0;
952 }
953
954 static inline u32 count_bits(u32 mask)
955 {
956         u32 temp = mask;
957         u32 count;
958         for (count = 0; temp != 0; count++)
959                 temp &= temp - 1;
960
961         return count;
962 }
963
964 static inline u32 clear_count_bits(u32 num, u32 clear_count)
965 {
966         u32 count = clear_count;
967         for (; (num != 0) && (count != 0); count--)
968                 num &= num - 1;
969
970         return num;
971 }
972
973 static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
974                                         struct gr_gk20a *gr)
975 {
976         u32 table_index_bits = 5;
977         u32 rows = (1 << table_index_bits);
978         u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
979
980         u32 row;
981         u32 index;
982         u32 gpc_index;
983         u32 gpcs_per_reg = 4;
984         u32 pes_index;
985         u32 tpc_count_pes;
986         u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
987
988         u32 alpha_target, beta_target;
989         u32 alpha_bits, beta_bits;
990         u32 alpha_mask, beta_mask, partial_mask;
991         u32 reg_offset;
992         bool assign_alpha;
993
994         u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
995         u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
996         u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
997
998         nvhost_dbg_fn("");
999
1000         memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1001         memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1002         memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1003
1004         for (row = 0; row < rows; ++row) {
1005                 alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
1006                 beta_target = gr->tpc_count - alpha_target;
1007
1008                 assign_alpha = (alpha_target < beta_target);
1009
1010                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1011                         reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
1012                         alpha_mask = beta_mask = 0;
1013
1014                         for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
1015                                 tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
1016
1017                                 if (assign_alpha) {
1018                                         alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
1019                                         beta_bits = tpc_count_pes - alpha_bits;
1020                                 } else {
1021                                         beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
1022                                         alpha_bits = tpc_count_pes - beta_bits;
1023                                 }
1024
1025                                 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
1026                                 partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
1027                                 alpha_mask |= partial_mask;
1028
1029                                 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
1030                                 beta_mask |= partial_mask;
1031
1032                                 alpha_target -= min(alpha_bits, alpha_target);
1033                                 beta_target -= min(beta_bits, beta_target);
1034
1035                                 if ((alpha_bits > 0) || (beta_bits > 0))
1036                                         assign_alpha = !assign_alpha;
1037                         }
1038
1039                         switch (gpc_index % gpcs_per_reg) {
1040                         case 0:
1041                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
1042                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
1043                                 break;
1044                         case 1:
1045                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
1046                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
1047                                 break;
1048                         case 2:
1049                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
1050                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
1051                                 break;
1052                         case 3:
1053                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
1054                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
1055                                 break;
1056                         }
1057                         map_reg_used[reg_offset] = true;
1058                 }
1059         }
1060
1061         for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
1062                 if (map_reg_used[index]) {
1063                         gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
1064                         gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
1065                 }
1066         }
1067
1068         return 0;
1069 }
1070
1071 static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1072 {
1073         struct gr_gk20a *gr = &g->gr;
1074         u32 tpc_index, gpc_index;
1075         u32 tpc_offset, gpc_offset;
1076         u32 sm_id = 0, gpc_id = 0;
1077         u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
1078         u32 tpc_per_gpc;
1079         u32 max_ways_evict = INVALID_MAX_WAYS;
1080
1081         nvhost_dbg_fn("");
1082
1083         for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
1084                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1085                         gpc_offset = proj_gpc_stride_v() * gpc_index;
1086                         if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1087                                 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
1088
1089                                 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1090                                              gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1091                                 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1092                                              gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1093                                 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
1094                                              gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1095                                 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1096                                              gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1097
1098                                 sm_id_to_gpc_id[sm_id] = gpc_index;
1099                                 sm_id++;
1100                         }
1101
1102                         gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
1103                                      gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1104                         gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1105                                      gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1106                 }
1107         }
1108
1109         for (tpc_index = 0, gpc_id = 0;
1110              tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1111              tpc_index++, gpc_id += 8) {
1112
1113                 if (gpc_id >= gr->gpc_count)
1114                         gpc_id = 0;
1115
1116                 tpc_per_gpc =
1117                         gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
1118                         gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
1119                         gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
1120                         gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
1121                         gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
1122                         gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
1123                         gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
1124                         gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
1125
1126                 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1127                 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1128         }
1129
1130         /* grSetupPDMapping stubbed for gk20a */
1131         gr_gk20a_setup_rop_mapping(g, gr);
1132         gr_gk20a_setup_alpha_beta_tables(g, gr);
1133
1134         if (gr->num_fbps == 1)
1135                 max_ways_evict = 9;
1136
1137         if (max_ways_evict != INVALID_MAX_WAYS)
1138                 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(),
1139                              ((gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) &
1140                                ~(ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0))) |
1141                               ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways_evict)));
1142
1143         for (gpc_index = 0;
1144              gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1145              gpc_index += 4) {
1146
1147                 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1148                              gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
1149                              gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
1150                              gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
1151                              gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
1152         }
1153
1154         gk20a_writel(g, gr_cwd_fs_r(),
1155                      gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1156                      gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1157
1158         gk20a_writel(g, gr_bes_zrop_settings_r(),
1159                      gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1160         gk20a_writel(g, gr_bes_crop_settings_r(),
1161                      gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1162
1163         return 0;
1164 }
1165
1166 static int gr_gk20a_force_image_save(struct channel_gk20a *c, u32 save_type)
1167 {
1168         struct gk20a *g = c->g;
1169         int ret;
1170
1171         u32 inst_base_ptr =
1172                 u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
1173
1174         nvhost_dbg_fn("");
1175
1176         ret = gr_gk20a_submit_fecs_method(g, 0, 0, 3,
1177                         gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1178                         gr_fecs_current_ctx_target_vid_mem_f() |
1179                         gr_fecs_current_ctx_valid_f(1), save_type, 0,
1180                         GR_IS_UCODE_OP_AND, 1, GR_IS_UCODE_OP_AND, 2);
1181         if (ret)
1182                 nvhost_err(dev_from_gk20a(g), "save context image failed");
1183
1184         return ret;
1185 }
1186
1187 /* init global golden image from a fresh gr_ctx in channel ctx.
1188    save a copy in local_golden_image in ctx_vars */
1189 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1190                                           struct channel_gk20a *c)
1191 {
1192         struct gr_gk20a *gr = &g->gr;
1193         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1194         u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1195         u32 ctx_header_words;
1196         u32 i;
1197         u32 data;
1198         void *ctx_ptr = NULL;
1199         void *gold_ptr = NULL;
1200         u32 err = 0;
1201
1202         nvhost_dbg_fn("");
1203
1204         err = gr_gk20a_ctx_bind_first_channel(g, c);
1205         if (err)
1206                 goto clean_up;
1207
1208         err = gr_gk20a_commit_global_ctx_buffers(g, c, 0);
1209         if (err)
1210                 goto clean_up;
1211
1212         gold_ptr = mem_op().mmap(gr->global_ctx_buffer[GOLDEN_CTX].ref);
1213         if (IS_ERR(gold_ptr))
1214                 goto clean_up;
1215
1216         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
1217         if (IS_ERR(ctx_ptr))
1218                 goto clean_up;
1219
1220         ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
1221         ctx_header_words >>= 2;
1222
1223         for (i = 0; i < ctx_header_words; i++) {
1224                 data = mem_rd32(ctx_ptr, i);
1225                 mem_wr32(gold_ptr, i, data);
1226         }
1227
1228         mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_v(), 0,
1229                  ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1230
1231         mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, 0);
1232
1233         gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1234
1235         gr_gk20a_force_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_f());
1236
1237         if (gr->ctx_vars.local_golden_image == NULL) {
1238
1239                 gr->ctx_vars.local_golden_image =
1240                         kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
1241
1242                 if (gr->ctx_vars.local_golden_image == NULL) {
1243                         err = -ENOMEM;
1244                         goto clean_up;
1245                 }
1246
1247                 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1248                         gr->ctx_vars.local_golden_image[i] =
1249                                 mem_rd32(gold_ptr, i);
1250         }
1251
1252         gr->ctx_vars.golden_image_initialized = true;
1253
1254 clean_up:
1255         if (err)
1256                 nvhost_dbg(dbg_fn | dbg_err, "fail");
1257         else
1258                 nvhost_dbg_fn("done");
1259
1260         if (gold_ptr)
1261                 mem_op().munmap(gr->global_ctx_buffer[GOLDEN_CTX].ref,
1262                                 gold_ptr);
1263         if (ctx_ptr)
1264                 mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1265
1266         return err;
1267 }
1268
1269 /* load saved fresh copy of gloden image into channel gr_ctx */
1270 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1271                                         struct channel_gk20a *c)
1272 {
1273         struct gr_gk20a *gr = &g->gr;
1274         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1275         u32 virt_addr_lo;
1276         u32 virt_addr_hi;
1277         u32 i;
1278         int ret = 0;
1279         void *ctx_ptr = NULL;
1280
1281         nvhost_dbg_fn("");
1282
1283         if (gr->ctx_vars.local_golden_image == NULL)
1284                 return -1;
1285
1286         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
1287         if (IS_ERR(ctx_ptr))
1288                 return -ENOMEM;
1289
1290         for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1291                 mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
1292
1293         mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_v(), 0, 0);
1294         mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_v(), 0, 0);
1295
1296         virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
1297         virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
1298
1299         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_v(), 0,
1300                  ch_ctx->patch_ctx.data_count);
1301         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_v(), 0,
1302                  virt_addr_lo);
1303         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_v(), 0,
1304                  virt_addr_hi);
1305
1306         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1307
1308         /* gr_gk20a_ctx_zcull_setup(g, c, false); */
1309         gr_gk20a_ctx_pm_setup(g, c, false);
1310
1311         if (tegra_revision == TEGRA_REVISION_SIM) {
1312                 u32 inst_base_ptr =
1313                         u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
1314
1315                 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0,
1316                                 gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1317                                 gr_fecs_current_ctx_target_vid_mem_f() |
1318                                 gr_fecs_current_ctx_valid_f(1),
1319                                 gr_fecs_method_push_adr_restore_golden_f(), 0,
1320                                 GR_IS_UCODE_OP_EQUAL, gr_fecs_ctxsw_mailbox_value_pass_v(),
1321                                 GR_IS_UCODE_OP_SKIP, 0);
1322                 if (ret)
1323                         nvhost_err(dev_from_gk20a(g),
1324                                    "restore context image failed");
1325         }
1326
1327         return ret;
1328 }
1329
1330 static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1331 {
1332         nvhost_dbg_fn("");
1333
1334         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1335                      gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1336
1337         gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1338         gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1339
1340         gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1341         gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1342
1343         nvhost_dbg_fn("done");
1344 }
1345
1346 static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
1347 {
1348         u32 ret;
1349
1350         nvhost_dbg_fn("");
1351
1352         if (tegra_revision == TEGRA_REVISION_SIM) {
1353                 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
1354                         gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
1355                 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
1356                         gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
1357         }
1358
1359         gr_gk20a_load_falcon_dmem(g);
1360         gr_gk20a_load_falcon_imem(g);
1361
1362         gr_gk20a_start_falcon_ucode(g);
1363
1364         ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
1365                                       GR_IS_UCODE_OP_EQUAL,
1366                                       eUcodeHandshakeInitComplete,
1367                                       GR_IS_UCODE_OP_SKIP, 0);
1368         if (ret) {
1369                 nvhost_err(dev_from_gk20a(g), "falcon ucode init timeout");
1370                 return ret;
1371         }
1372
1373         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
1374         gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
1375         gk20a_writel(g, gr_fecs_method_push_r(),
1376                      gr_fecs_method_push_adr_set_watchdog_timeout_f());
1377
1378         nvhost_dbg_fn("done");
1379         return 0;
1380 }
1381
1382 #define PRI_GPCCS_ADDR_WIDTH 15
1383 #define CTXSW_UCODE_HEADER_SIZE_IN_BYTES 256
1384
1385 #define PRI_GPCCS_ADDR_MASK(addr)       ((addr) & ((1 << PRI_GPCCS_ADDR_WIDTH) - 1))
1386 #define PRI_GPC_ADDR(addr, gpc)         (proj_gpc_base_v()+((gpc)*proj_gpc_stride_v())+(addr))
1387
1388 static int gr_gk20a_create_ctx_header(struct gk20a *g, u32 *header)
1389 {
1390         u32 *header_curr;
1391         u32 num_gpcs;
1392         u32 num_tpcs;
1393         u32 num_ppcs;
1394         u32 tpc_id_mask;
1395         u32 ppc_mask;
1396         u32 rc_offset, rc_size;
1397         u32 num_fecs_ramchains;
1398         u32 num_gpc_ramchains;
1399         u32 sys_priv_size;
1400         u32 sys_priv_offset;
1401         u32 gpc_priv_size;
1402         u32 gpc_priv_offset;
1403         u32 fecs_image_size;
1404         u32 gpc_image_size;
1405         u32 total_image_size;
1406         u32 lane, gpc, ppc;
1407         u32 addr, words, bytes;
1408         u32 litter_num_pes_per_gpc;
1409
1410         if (!g->gr.ctx_vars.valid)
1411                 return -1;
1412
1413         nvhost_dbg_fn("");
1414
1415         if (tegra_revision == TEGRA_REVISION_SIM) {
1416                 num_gpcs = g->gr.gpc_count;
1417         } else {
1418                 num_gpcs = gk20a_readl(g, gr_fecs_fs_r());
1419                 num_gpcs = gr_fecs_fs_num_available_gpcs_v(num_gpcs);
1420         }
1421
1422         header_curr = header;
1423
1424         header_curr[ctxsw_prog_main_image_num_gpc_v() >> 2] = num_gpcs;
1425         header_curr[ctxsw_prog_main_image_magic_value_v() >> 2] =
1426                 ctxsw_prog_main_image_magic_value_v_value_f();
1427
1428         fecs_image_size = g->gr.ctx_vars.ctxsw_regs.sys.count << 2;
1429         fecs_image_size = ((fecs_image_size + 255) & ~255);
1430
1431         sys_priv_size = fecs_image_size >> 8;
1432         sys_priv_offset = 2 + num_gpcs;
1433
1434         header_curr += (CTXSW_UCODE_HEADER_SIZE_IN_BYTES >> 2);
1435         header_curr[ctxsw_prog_local_reg_ctl_v() >> 2] =
1436                 ctxsw_prog_local_reg_ctl_offset_f(sys_priv_offset) |
1437                 ctxsw_prog_local_reg_ctl_size_f(sys_priv_size);
1438         header_curr[ctxsw_prog_local_magic_value_v() >> 2] =
1439                 ctxsw_prog_local_magic_value_v_value_f();
1440
1441         if (tegra_revision != TEGRA_REVISION_SIM) {
1442                 rc_offset = 0;
1443                 rc_size = 0;
1444
1445                 num_fecs_ramchains = gr_fecs_rc_lanes_num_chains_v(
1446                         gk20a_readl(g, gr_fecs_rc_lanes_r()));
1447
1448                 header_curr[ctxsw_prog_local_image_ctl_v() >> 2] =
1449                         ctxsw_prog_local_image_ctl_num_ramchains_f(num_fecs_ramchains);
1450
1451                 for (lane = 0; lane < num_fecs_ramchains; lane++) {
1452                         rc_offset += (rc_size >> 8);
1453
1454                         gk20a_writel(g, gr_fecs_falcon_addr_v(), lane);
1455                         words = gr_fecs_rc_lane_size_v_v(
1456                                         gk20a_readl(g, gr_fecs_rc_lane_size_r(0)));
1457                         header_curr[ctxsw_prog_local_ramchain_save_v(lane) >> 2] =
1458                                 ctxsw_prog_local_ramchain_save_words_f(words);
1459                         bytes = words << 2;
1460
1461                         if (bytes)
1462                                 header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1463                                         ctxsw_prog_local_ramchain_ctl_offset_f(rc_offset);
1464                         else
1465                                 header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1466                                         ctxsw_prog_local_ramchain_ctl_offset_f(0);
1467
1468                         rc_size = (bytes + 0xFF) & ~0xFF;
1469                         fecs_image_size += rc_size;
1470                 }
1471         }
1472
1473         header_curr[ctxsw_prog_local_image_size_v() >> 2] = fecs_image_size;
1474         total_image_size = fecs_image_size + 256 + 256 + num_gpcs * 256;
1475
1476         litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
1477         for (gpc = 0; gpc < num_gpcs; gpc++) {
1478
1479                 header_curr += (CTXSW_UCODE_HEADER_SIZE_IN_BYTES >> 2);
1480
1481                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(gr_gpc0_fs_gpc_r()), gpc);
1482                 num_tpcs = gr_gpc0_fs_gpc_num_available_tpcs_v(
1483                                 gk20a_readl(g, addr));
1484
1485                 if (litter_num_pes_per_gpc > 1) {
1486                         num_ppcs = 0;
1487                         ppc_mask = 0;
1488                         for (ppc = 0; ppc < litter_num_pes_per_gpc; ppc++) {
1489                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1490                                         gr_gpc0_gpm_pd_pes_tpc_id_mask_r(ppc)), gpc);
1491                                 tpc_id_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(
1492                                         gk20a_readl(g, addr));
1493                                 if (tpc_id_mask) {
1494                                         num_ppcs++;
1495                                         ppc_mask |= (1 << ppc);
1496                                 }
1497                         }
1498                         header_curr[ctxsw_prog_local_image_ppc_info_v() >> 2] =
1499                                 ctxsw_prog_local_image_ppc_info_ppc_mask_f(ppc_mask) |
1500                                 ctxsw_prog_local_image_ppc_info_num_ppcs_f(num_ppcs);
1501                 }
1502
1503                 gpc_priv_offset = total_image_size >> 8;
1504                 gpc_image_size = (g->gr.ctx_vars.ctxsw_regs.gpc.count +
1505                                   g->gr.ctx_vars.ctxsw_regs.tpc.count * num_tpcs) << 2;
1506                 gpc_image_size = ((gpc_image_size + 0xFF) & ~0xFF);
1507                 gpc_priv_size = gpc_image_size >> 8;
1508
1509                 header_curr[ctxsw_prog_local_reg_ctl_v() >> 2] =
1510                         ctxsw_prog_local_reg_ctl_offset_f(gpc_priv_offset) |
1511                         ctxsw_prog_local_reg_ctl_size_f(gpc_priv_size);
1512
1513                 header_curr[ctxsw_prog_local_image_num_tpcs_v() >> 2] =
1514                         num_tpcs;
1515                 header_curr[ctxsw_prog_local_magic_value_v() >> 2] =
1516                         ctxsw_prog_local_magic_value_v_value_f();
1517
1518                 if (tegra_revision != TEGRA_REVISION_SIM) {
1519                         rc_offset = 0;
1520                         rc_size = 0;
1521
1522                         addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1523                                 gr_gpccs_rc_lanes_r()), gpc);
1524                         num_gpc_ramchains = gr_gpccs_rc_lanes_num_chains_v(
1525                                 gk20a_readl(g, addr));
1526
1527                         header_curr[ctxsw_prog_local_image_ctl_v() >> 2] =
1528                                 ctxsw_prog_local_image_ctl_num_ramchains_f(num_gpc_ramchains);
1529
1530                         for (lane = 0; lane < num_gpc_ramchains; lane++) {
1531                                 rc_offset += rc_size >> 8;
1532
1533                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1534                                                 gr_gpccs_falcon_addr_r()), gpc);
1535                                 gk20a_writel(g, addr, lane);
1536
1537                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1538                                                 gr_gpccs_rc_lane_size_r(0)), gpc);
1539                                 words = gr_gpccs_rc_lane_size_v_v(
1540                                                 gk20a_readl(g, addr));
1541
1542                                 header_curr[ctxsw_prog_local_ramchain_save_v(lane) >> 2] =
1543                                         ctxsw_prog_local_ramchain_save_words_f(words);
1544                                 bytes = words << 2;
1545
1546                                 if (bytes)
1547                                         header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1548                                                 ctxsw_prog_local_ramchain_ctl_offset_f(words);
1549                                 else
1550                                         header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1551                                                 ctxsw_prog_local_ramchain_ctl_offset_f(0);
1552
1553                                 rc_size = (bytes + 0xFF) & ~0xFF;
1554                                 gpc_image_size += rc_size;
1555                         }
1556                 }
1557
1558                 header_curr[ctxsw_prog_local_image_size_v() >> 2] = gpc_image_size;
1559                 total_image_size += gpc_image_size;
1560         }
1561
1562         header[ctxsw_prog_main_image_size_v() >> 2] = total_image_size;
1563
1564         return 0;
1565 }
1566
1567 static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
1568 {
1569         u32 golden_ctx_image_size = 0;
1570         u32 zcull_ctx_image_size = 0;
1571         u32 pm_ctx_image_size = 0;
1572         u32 ret;
1573
1574         nvhost_dbg_fn("");
1575
1576         if (g->gr.ctx_vars.golden_image_size)
1577                 return 0;
1578
1579         /* 256 bytes hdr + 256 bytes FECS + numGpc * 256 bytes GPCCS */
1580         gr->ctx_vars.buffer_header_size = 256 + 256 + 256 * gr->gpc_count;
1581
1582         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1583                         gr_fecs_method_push_adr_discover_image_size_f(),
1584                         &golden_ctx_image_size,
1585                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1586         if (ret) {
1587                 nvhost_err(dev_from_gk20a(g),
1588                            "query golden image size failed");
1589                 return ret;
1590         }
1591
1592         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1593                         gr_fecs_method_push_adr_discover_zcull_image_size_f(),
1594                         &zcull_ctx_image_size,
1595                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1596         if (ret) {
1597                 nvhost_err(dev_from_gk20a(g),
1598                            "query zcull ctx image size failed");
1599                 return ret;
1600         }
1601
1602         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1603                         gr_fecs_method_push_adr_discover_pm_image_size_f(),
1604                         &pm_ctx_image_size,
1605                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1606         if (ret) {
1607                 nvhost_err(dev_from_gk20a(g),
1608                            "query pm ctx image size failed");
1609                 return ret;
1610         }
1611
1612         g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
1613         g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
1614
1615         /* create a temp header for ctx override */
1616         if (!gr->temp_ctx_header) {
1617                 gr->temp_ctx_header =
1618                         kzalloc(gr->ctx_vars.buffer_header_size, GFP_KERNEL);
1619                 if (!gr->temp_ctx_header)
1620                         return -ENOMEM;
1621         }
1622
1623         gr_gk20a_create_ctx_header(g, (u32 *)gr->temp_ctx_header);
1624
1625         nvhost_dbg_fn("done");
1626         return 0;
1627 }
1628
1629 static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
1630 {
1631         struct gr_gk20a *gr = &g->gr;
1632         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1633         struct mem_handle *mem;
1634         u32 i, attr_buffer_size;
1635
1636         u32 cb_buffer_size = gr_scc_bundle_cb_size_div_256b__prod_v() *
1637                 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
1638
1639         u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
1640                 gr_scc_pagepool_total_pages_byte_granularity_v();
1641
1642         u32 attr_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
1643         u32 alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
1644
1645         u32 attr_cb_size =
1646                 attr_cb_default_size + (attr_cb_default_size >> 1);
1647         u32 alpha_cb_size =
1648                 alpha_cb_default_size + (alpha_cb_default_size >> 1);
1649
1650         u32 num_tpcs_per_pes = proj_scal_litter_num_tpcs_per_pes_v();
1651         u32 attr_max_size_per_tpc =
1652                 gr_gpc0_ppc0_cbm_cfg_size_v(~0) / num_tpcs_per_pes;
1653         u32 alpha_max_size_per_tpc =
1654                 gr_gpc0_ppc0_cbm_cfg2_size_v(~0) / num_tpcs_per_pes;
1655
1656
1657         nvhost_dbg_fn("");
1658
1659         attr_cb_size =
1660                 (attr_cb_size > attr_max_size_per_tpc) ?
1661                         attr_max_size_per_tpc : attr_cb_size;
1662         attr_cb_default_size =
1663                 (attr_cb_default_size > attr_cb_size) ?
1664                         attr_cb_size : attr_cb_default_size;
1665         alpha_cb_size =
1666                 (alpha_cb_size > alpha_max_size_per_tpc) ?
1667                         alpha_max_size_per_tpc : alpha_cb_size;
1668         alpha_cb_default_size =
1669                 (alpha_cb_default_size > alpha_cb_size) ?
1670                         alpha_cb_size : alpha_cb_default_size;
1671
1672         attr_buffer_size =
1673                 (gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * alpha_cb_size +
1674                  gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * alpha_cb_size) *
1675                  gr->gpc_count;
1676
1677         nvhost_dbg_info("cb_buffer_size : %d", cb_buffer_size);
1678
1679         mem = mem_op().alloc(memmgr, cb_buffer_size,
1680                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1681                           DEFAULT_NVMAP_ALLOC_FLAGS,
1682                           NVMAP_HEAP_CARVEOUT_GENERIC);
1683         if (IS_ERR_OR_NULL(mem))
1684                 goto clean_up;
1685
1686         gr->global_ctx_buffer[CIRCULAR].ref = mem;
1687         gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
1688
1689         nvhost_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
1690
1691         mem = mem_op().alloc(memmgr, pagepool_buffer_size,
1692                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1693                           DEFAULT_NVMAP_ALLOC_FLAGS,
1694                           NVMAP_HEAP_CARVEOUT_GENERIC);
1695         if (IS_ERR_OR_NULL(mem))
1696                 goto clean_up;
1697
1698         gr->global_ctx_buffer[PAGEPOOL].ref = mem;
1699         gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
1700
1701         nvhost_dbg_info("attr_buffer_size : %d", attr_buffer_size);
1702
1703         mem = mem_op().alloc(memmgr, attr_buffer_size,
1704                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1705                           DEFAULT_NVMAP_ALLOC_FLAGS,
1706                           NVMAP_HEAP_CARVEOUT_GENERIC);
1707         if (IS_ERR_OR_NULL(mem))
1708                 goto clean_up;
1709
1710         gr->global_ctx_buffer[ATTRIBUTE].ref = mem;
1711         gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
1712
1713         mem = mem_op().alloc(memmgr, attr_buffer_size,
1714                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1715                           DEFAULT_NVMAP_ALLOC_FLAGS,
1716                           NVMAP_HEAP_CARVEOUT_GENERIC); /* TBD: use NVMAP_HEAP_CARVEOUT_VPR */
1717         if (IS_ERR_OR_NULL(mem))
1718                 goto clean_up;
1719
1720         gr->global_ctx_buffer[ATTRIBUTE_VPR].ref = mem;
1721         gr->global_ctx_buffer[ATTRIBUTE_VPR].size = attr_buffer_size;
1722
1723         nvhost_dbg_info("golden_image_size : %d",
1724                    gr->ctx_vars.golden_image_size);
1725
1726         mem = mem_op().alloc(memmgr, gr->ctx_vars.golden_image_size,
1727                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1728                           DEFAULT_NVMAP_ALLOC_FLAGS,
1729                           NVMAP_HEAP_CARVEOUT_GENERIC);
1730         if (IS_ERR_OR_NULL(mem))
1731                 goto clean_up;
1732
1733         gr->global_ctx_buffer[GOLDEN_CTX].ref = mem;
1734         gr->global_ctx_buffer[GOLDEN_CTX].size =
1735                 gr->ctx_vars.golden_image_size;
1736
1737         nvhost_dbg_fn("done");
1738         return 0;
1739
1740  clean_up:
1741         nvhost_dbg(dbg_fn | dbg_err, "fail");
1742         for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1743                 if (gr->global_ctx_buffer[i].ref) {
1744                         mem_op().put(memmgr,
1745                                 gr->global_ctx_buffer[i].ref);
1746                         memset(&gr->global_ctx_buffer[i],
1747                                 0, sizeof(struct mem_desc));
1748                 }
1749         }
1750         return -ENOMEM;
1751 }
1752
1753 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
1754 {
1755         struct gr_gk20a *gr = &g->gr;
1756         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1757         u32 i;
1758
1759         for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1760                 mem_op().put(memmgr, gr->global_ctx_buffer[i].ref);
1761                 memset(&gr->global_ctx_buffer[i], 0, sizeof(struct mem_desc));
1762         }
1763 }
1764
1765 static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
1766                                         struct channel_gk20a *c)
1767 {
1768         struct vm_gk20a *ch_vm = c->vm;
1769         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1770         struct mem_handle *handle_ref;
1771         u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1772         struct gr_gk20a *gr = &g->gr;
1773         u64 gpu_va;
1774         u32 i;
1775         nvhost_dbg_fn("");
1776
1777         gpu_va = ch_vm->map(ch_vm, memmgr,
1778                             gr->global_ctx_buffer[CIRCULAR].ref,
1779                             0, 0, 0 /*offset_align, flags, kind*/);
1780         if (!gpu_va)
1781                 goto clean_up;
1782         g_bfr_va[CIRCULAR_VA] = gpu_va;
1783
1784         if (!c->vpr)
1785                 handle_ref = gr->global_ctx_buffer[ATTRIBUTE].ref;
1786         else
1787                 handle_ref = gr->global_ctx_buffer[ATTRIBUTE_VPR].ref;
1788
1789         gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
1790                             0, 0, 0 /*offset_align, flags, kind*/);
1791         if (!gpu_va)
1792                 goto clean_up;
1793         g_bfr_va[ATTRIBUTE_VA] = gpu_va;
1794
1795         gpu_va = ch_vm->map(ch_vm, memmgr,
1796                             gr->global_ctx_buffer[PAGEPOOL].ref,
1797                             0, 0, 0/*offset_align, flags, kind*/);
1798         if (!gpu_va)
1799                 goto clean_up;
1800         g_bfr_va[PAGEPOOL_VA] = gpu_va;
1801
1802         gpu_va = ch_vm->map(ch_vm, memmgr,
1803                             gr->global_ctx_buffer[GOLDEN_CTX].ref,
1804                             0, 0, 0 /*offset_align, flags, kind*/);
1805         if (!gpu_va)
1806                 goto clean_up;
1807         g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
1808
1809         c->ch_ctx.global_ctx_buffer_mapped = true;
1810         return 0;
1811
1812  clean_up:
1813         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1814                 if (g_bfr_va[i]) {
1815                         ch_vm->unmap(ch_vm, g_bfr_va[i]);
1816                         g_bfr_va[i] = 0;
1817                 }
1818         }
1819         return -ENOMEM;
1820 }
1821
1822 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
1823 {
1824         struct vm_gk20a *ch_vm = c->vm;
1825         u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1826         u32 i;
1827
1828         nvhost_dbg_fn("");
1829
1830         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1831                 if (g_bfr_va[i]) {
1832                         ch_vm->unmap(ch_vm, g_bfr_va[i]);
1833                         g_bfr_va[i] = 0;
1834                 }
1835         }
1836         c->ch_ctx.global_ctx_buffer_mapped = false;
1837 }
1838
1839 static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
1840                                 struct channel_gk20a *c)
1841 {
1842         struct gr_gk20a *gr = &g->gr;
1843         struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
1844         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1845         struct vm_gk20a *ch_vm = c->vm;
1846
1847         nvhost_dbg_fn("");
1848
1849         if (gr->ctx_vars.buffer_size == 0)
1850                 return 0;
1851
1852         /* alloc channel gr ctx buffer */
1853         gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
1854         gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
1855
1856         gr_ctx->mem.ref = mem_op().alloc(memmgr,
1857                                 gr->ctx_vars.buffer_total_size,
1858                                 DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1859                                 DEFAULT_NVMAP_ALLOC_FLAGS,
1860                                 NVMAP_HEAP_CARVEOUT_GENERIC);
1861
1862         if (IS_ERR(gr_ctx->mem.ref))
1863                 return -ENOMEM;
1864
1865         gr_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1866                 gr_ctx->mem.ref, 0, 0, 0 /*offset_align, flags, kind*/);
1867         if (!gr_ctx->gpu_va) {
1868                 mem_op().put(memmgr, gr_ctx->mem.ref);
1869                 return -ENOMEM;
1870         }
1871
1872         return 0;
1873 }
1874
1875 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
1876 {
1877         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1878         struct mem_mgr *ch_nvmap = gk20a_channel_mem_mgr(c);
1879         struct vm_gk20a *ch_vm = c->vm;
1880
1881         nvhost_dbg_fn("");
1882
1883         ch_vm->unmap(ch_vm, ch_ctx->gr_ctx.gpu_va);
1884         mem_op().put(ch_nvmap, ch_ctx->gr_ctx.mem.ref);
1885 }
1886
1887 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
1888                                 struct channel_gk20a *c)
1889 {
1890         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1891         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1892         struct vm_gk20a *ch_vm = c->vm;
1893
1894         nvhost_dbg_fn("");
1895
1896         patch_ctx->mem.ref = mem_op().alloc(memmgr, 128 * sizeof(u32),
1897                                 DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1898                                 DEFAULT_NVMAP_ALLOC_FLAGS,
1899                                 NVMAP_HEAP_CARVEOUT_GENERIC);
1900         if (IS_ERR(patch_ctx->mem.ref))
1901                 return -ENOMEM;
1902
1903         patch_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1904                                 patch_ctx->mem.ref,
1905                                 0, 0, 0 /*offset_align, flags, kind*/);
1906         if (!patch_ctx->gpu_va)
1907                 goto clean_up;
1908
1909         nvhost_dbg_fn("done");
1910         return 0;
1911
1912  clean_up:
1913         nvhost_dbg(dbg_fn | dbg_err, "fail");
1914         if (patch_ctx->mem.ref) {
1915                 mem_op().put(memmgr, patch_ctx->mem.ref);
1916                 patch_ctx->mem.ref = 0;
1917         }
1918
1919         return -ENOMEM;
1920 }
1921
1922 static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
1923 {
1924         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1925         struct vm_gk20a *ch_vm = c->vm;
1926
1927         nvhost_dbg_fn("");
1928
1929         ch_vm->unmap(ch_vm, patch_ctx->gpu_va);
1930         patch_ctx->gpu_va = 0;
1931         patch_ctx->data_count = 0;
1932 }
1933
1934 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
1935 {
1936         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1937         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1938
1939         nvhost_dbg_fn("");
1940
1941         gr_gk20a_unmap_channel_patch_ctx(c);
1942
1943         if (patch_ctx->mem.ref) {
1944                 mem_op().put(memmgr, patch_ctx->mem.ref);
1945                 patch_ctx->mem.ref = 0;
1946         }
1947 }
1948
1949 void gk20a_free_channel_ctx(struct channel_gk20a *c)
1950 {
1951         gr_gk20a_unmap_global_ctx_buffers(c);
1952         gr_gk20a_free_channel_patch_ctx(c);
1953         gr_gk20a_free_channel_gr_ctx(c);
1954
1955         /* zcull_ctx, pm_ctx */
1956
1957         memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
1958
1959         c->num_objects = 0;
1960         c->first_init = false;
1961 }
1962
1963 int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
1964                         struct nvhost_alloc_obj_ctx_args *args)
1965 {
1966         struct gk20a *g = c->g;
1967         struct gr_gk20a *gr = &g->gr;
1968         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1969         bool change_to_compute_mode = false;
1970         int err = 0;
1971
1972         nvhost_dbg_fn("");
1973
1974         /* an address space needs to have been bound at this point.*/
1975         if (!gk20a_channel_as_bound(c)) {
1976                 nvhost_err(dev_from_gk20a(g),
1977                            "not bound to address space at time"
1978                            " of grctx allocation");
1979                 return -EINVAL;
1980         }
1981
1982         switch (args->class_num) {
1983         case KEPLER_COMPUTE_A:
1984                 /* tbd: NV2080_CTRL_GPU_COMPUTE_MODE_RULES_EXCLUSIVE_COMPUTE */
1985                 /* tbd: PDB_PROP_GRAPHICS_DISTINCT_3D_AND_COMPUTE_STATE_DEF  */
1986                 change_to_compute_mode = true;
1987                 break;
1988         case KEPLER_C:
1989         case FERMI_TWOD_A:
1990         case KEPLER_DMA_COPY_A:
1991                 break;
1992
1993         default:
1994                 nvhost_err(dev_from_gk20a(g),
1995                            "invalid obj class 0x%x", args->class_num);
1996                 err = -EINVAL;
1997                 goto out;
1998         }
1999
2000         /* allocate gr ctx buffer */
2001         if (ch_ctx->gr_ctx.mem.ref == NULL) {
2002                 err = gr_gk20a_alloc_channel_gr_ctx(g, c);
2003                 if (err) {
2004                         nvhost_err(dev_from_gk20a(g),
2005                                 "fail to allocate gr ctx buffer");
2006                         goto out;
2007                 }
2008         } else {
2009                 /*TBD: needs to be more subtle about which is being allocated
2010                 * as some are allowed to be allocated along same channel */
2011                 nvhost_err(dev_from_gk20a(g),
2012                         "too many classes alloc'd on same channel");
2013                 err = -EINVAL;
2014                 goto out;
2015         }
2016
2017         /* commit gr ctx buffer */
2018         err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
2019         if (err) {
2020                 nvhost_err(dev_from_gk20a(g),
2021                         "fail to commit gr ctx buffer");
2022                 goto out;
2023         }
2024
2025         /* set misc. might be possible to move around later */
2026         ch_ctx->pm_ctx.ctx_sw_mode =
2027                 ctxsw_prog_main_image_pm_mode_no_ctxsw_v();
2028
2029         /* allocate patch buffer */
2030         if (ch_ctx->patch_ctx.mem.ref == NULL) {
2031                 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2032                 if (err) {
2033                         nvhost_err(dev_from_gk20a(g),
2034                                 "fail to allocate patch buffer");
2035                         goto out;
2036                 }
2037         }
2038
2039         /* map global buffer to channel gpu_va and commit */
2040         if (!ch_ctx->global_ctx_buffer_mapped) {
2041                 err = gr_gk20a_map_global_ctx_buffers(g, c);
2042                 if (err) {
2043                         nvhost_err(dev_from_gk20a(g),
2044                                 "fail to map global ctx buffer");
2045                         goto out;
2046                 }
2047                 gr_gk20a_elpg_protected_call(g,
2048                         gr_gk20a_commit_global_ctx_buffers(g, c, 1));
2049         }
2050
2051         /* init gloden image, ELPG enabled after this is done */
2052         if (!gr->ctx_vars.golden_image_initialized) {
2053                 err = gr_gk20a_init_golden_ctx_image(g, c);
2054                 if (err) {
2055                         nvhost_err(dev_from_gk20a(g),
2056                                 "fail to init golden ctx image");
2057                         goto out;
2058                 }
2059         }
2060
2061         /* load golden image */
2062         if (!c->first_init) {
2063                 err = gr_gk20a_elpg_protected_call(g,
2064                         gr_gk20a_load_golden_ctx_image(g, c));
2065                 if (err) {
2066                         nvhost_err(dev_from_gk20a(g),
2067                                 "fail to load golden ctx image");
2068                         goto out;
2069                 }
2070                 c->first_init = true;
2071         }
2072
2073         c->num_objects++;
2074
2075         nvhost_dbg_fn("done");
2076         return 0;
2077 out:
2078         /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
2079            can be reused so no need to release them.
2080            2. golden image init and load is a one time thing so if
2081            they pass, no need to undo. */
2082         nvhost_dbg(dbg_fn | dbg_err, "fail");
2083         return err;
2084 }
2085
2086 int gk20a_free_obj_ctx(struct channel_gk20a  *c,
2087                        struct nvhost_free_obj_ctx_args *args)
2088 {
2089         nvhost_dbg_fn("");
2090
2091         if (c->num_objects == 0)
2092                 return 0;
2093
2094         c->num_objects--;
2095
2096         if (c->num_objects == 0) {
2097                 c->first_init = false;
2098                 gr_gk20a_unmap_channel_patch_ctx(c);
2099         }
2100
2101         return 0;
2102 }
2103
2104 static void gk20a_remove_gr_support(struct gk20a *g, struct gr_gk20a *gr)
2105 {
2106         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2107
2108         nvhost_dbg_fn("");
2109
2110         gr_gk20a_free_global_ctx_buffers(g);
2111
2112         mem_op().unpin(memmgr, gr->mmu_wr_mem.mem.ref);
2113         mem_op().unpin(memmgr, gr->mmu_rd_mem.mem.ref);
2114         mem_op().unpin(memmgr, gr->compbit_store.mem.ref);
2115         mem_op().put(memmgr, gr->mmu_wr_mem.mem.ref);
2116         mem_op().put(memmgr, gr->mmu_rd_mem.mem.ref);
2117         mem_op().put(memmgr, gr->compbit_store.mem.ref);
2118         kfree(gr->gpc_tpc_count);
2119         kfree(gr->gpc_ppc_count);
2120         kfree(gr->pes_tpc_count[0]);
2121         kfree(gr->pes_tpc_count[1]);
2122         kfree(gr->pes_tpc_mask[0]);
2123         kfree(gr->pes_tpc_mask[1]);
2124         kfree(gr->gpc_skip_mask);
2125         kfree(gr->temp_ctx_header);
2126         kfree(gr->ctx_vars.ucode.fecs.inst.l);
2127         kfree(gr->ctx_vars.ucode.fecs.data.l);
2128         kfree(gr->ctx_vars.ucode.gpccs.inst.l);
2129         kfree(gr->ctx_vars.ucode.gpccs.data.l);
2130         kfree(gr->ctx_vars.sw_bundle_init.l);
2131         kfree(gr->ctx_vars.sw_method_init.l);
2132         kfree(gr->ctx_vars.sw_ctx_load.l);
2133         kfree(gr->ctx_vars.sw_non_ctx_load.l);
2134         kfree(gr->ctx_vars.ctxsw_regs.sys.l);
2135         kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
2136         kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
2137         kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
2138         kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
2139         kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
2140         kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
2141         kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
2142
2143         memset(&gr->mmu_wr_mem, 0, sizeof(struct mem_desc));
2144         memset(&gr->mmu_rd_mem, 0, sizeof(struct mem_desc));
2145         memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
2146         gr->gpc_tpc_count = NULL;
2147         gr->gpc_ppc_count = NULL;
2148         gr->pes_tpc_count[0] = NULL;
2149         gr->pes_tpc_count[1] = NULL;
2150         gr->pes_tpc_mask[0] = NULL;
2151         gr->pes_tpc_mask[1] = NULL;
2152         gr->gpc_skip_mask = NULL;
2153         gr->temp_ctx_header = NULL;
2154
2155         nvhost_allocator_destroy(&gr->comp_tags);
2156
2157         /*tbd*/
2158 }
2159
2160 static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
2161 {
2162         u32 gpc_index, pes_index;
2163         u32 pes_tpc_mask;
2164         u32 pes_tpc_count;
2165         u32 pes_heavy_index;
2166         u32 gpc_new_skip_mask;
2167         u32 tmp;
2168
2169         tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
2170         gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
2171
2172         tmp = gk20a_readl(g, top_num_gpcs_r());
2173         gr->max_gpc_count = top_num_gpcs_value_v(tmp);
2174
2175         tmp = gk20a_readl(g, top_num_fbps_r());
2176         gr->max_fbps_count = top_num_fbps_value_v(tmp);
2177
2178         tmp = gk20a_readl(g, top_tpc_per_gpc_r());
2179         gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
2180
2181         gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
2182
2183         tmp = gk20a_readl(g, top_num_fbps_r());
2184         gr->sys_count = top_num_fbps_value_v(tmp);
2185
2186         tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
2187         gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
2188
2189         gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
2190         gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
2191
2192         if (!gr->gpc_count) {
2193                 nvhost_err(dev_from_gk20a(g), "gpc_count==0!");
2194                 goto clean_up;
2195         }
2196
2197         gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2198         gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2199         gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2200         gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2201         gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2202         gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2203         gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2204         gr->gpc_skip_mask =
2205                 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
2206                         GFP_KERNEL);
2207
2208         if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
2209             !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
2210             !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
2211                 goto clean_up;
2212
2213         gr->ppc_count = 0;
2214         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2215                 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
2216
2217                 gr->gpc_tpc_count[gpc_index] =
2218                         gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
2219                 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
2220
2221                 gr->gpc_zcb_count[gpc_index] =
2222                         gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
2223                 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
2224
2225                 gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
2226                 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
2227                 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
2228
2229                         tmp = gk20a_readl(g,
2230                                 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
2231                                 gpc_index * proj_gpc_stride_v());
2232
2233                         pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
2234                         pes_tpc_count = count_bits(pes_tpc_mask);
2235
2236                         gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
2237                         gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
2238                 }
2239
2240                 gpc_new_skip_mask = 0;
2241                 if (gr->pes_tpc_count[0][gpc_index] +
2242                     gr->pes_tpc_count[1][gpc_index] == 5) {
2243                         pes_heavy_index =
2244                                 gr->pes_tpc_count[0][gpc_index] >
2245                                 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2246
2247                         gpc_new_skip_mask =
2248                                 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2249                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2250                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2251
2252                 } else if ((gr->pes_tpc_count[0][gpc_index] +
2253                             gr->pes_tpc_count[1][gpc_index] == 4) &&
2254                            (gr->pes_tpc_count[0][gpc_index] !=
2255                             gr->pes_tpc_count[1][gpc_index])) {
2256                                 pes_heavy_index =
2257                                     gr->pes_tpc_count[0][gpc_index] >
2258                                     gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2259
2260                         gpc_new_skip_mask =
2261                                 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2262                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2263                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2264                 }
2265                 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
2266         }
2267
2268         nvhost_dbg_info("fbps: %d", gr->num_fbps);
2269         nvhost_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
2270         nvhost_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
2271         nvhost_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
2272         nvhost_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
2273         nvhost_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
2274         nvhost_dbg_info("sys_count: %d", gr->sys_count);
2275         nvhost_dbg_info("gpc_count: %d", gr->gpc_count);
2276         nvhost_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
2277         nvhost_dbg_info("tpc_count: %d", gr->tpc_count);
2278         nvhost_dbg_info("ppc_count: %d", gr->ppc_count);
2279
2280         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2281                 nvhost_dbg_info("gpc_tpc_count[%d] : %d",
2282                            gpc_index, gr->gpc_tpc_count[gpc_index]);
2283         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2284                 nvhost_dbg_info("gpc_zcb_count[%d] : %d",
2285                            gpc_index, gr->gpc_zcb_count[gpc_index]);
2286         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2287                 nvhost_dbg_info("gpc_ppc_count[%d] : %d",
2288                            gpc_index, gr->gpc_ppc_count[gpc_index]);
2289         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2290                 nvhost_dbg_info("gpc_skip_mask[%d] : %d",
2291                            gpc_index, gr->gpc_skip_mask[gpc_index]);
2292         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2293                 for (pes_index = 0;
2294                      pes_index < gr->pe_count_per_gpc;
2295                      pes_index++)
2296                         nvhost_dbg_info("pes_tpc_count[%d][%d] : %d",
2297                                    pes_index, gpc_index,
2298                                    gr->pes_tpc_count[pes_index][gpc_index]);
2299
2300         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2301                 for (pes_index = 0;
2302                      pes_index < gr->pe_count_per_gpc;
2303                      pes_index++)
2304                         nvhost_dbg_info("pes_tpc_mask[%d][%d] : %d",
2305                                    pes_index, gpc_index,
2306                                    gr->pes_tpc_mask[pes_index][gpc_index]);
2307
2308         gr->bundle_cb_default_size = gr_scc_bundle_cb_size_div_256b__prod_v();
2309         gr->min_gpm_fifo_depth = gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
2310         gr->bundle_cb_token_limit = gr_pd_ab_dist_cfg2_token_limit_init_v();
2311         gr->attrib_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
2312         /* gk20a has a fixed beta CB RAM, don't alloc more */
2313         gr->attrib_cb_size = gr->attrib_cb_default_size;
2314         gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
2315         gr->alpha_cb_size = gr->alpha_cb_default_size + (gr->alpha_cb_default_size >> 1);
2316         gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
2317
2318         nvhost_dbg_info("bundle_cb_default_size: %d",
2319                    gr->bundle_cb_default_size);
2320         nvhost_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
2321         nvhost_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
2322         nvhost_dbg_info("attrib_cb_default_size: %d",
2323                    gr->attrib_cb_default_size);
2324         nvhost_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
2325         nvhost_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
2326         nvhost_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
2327         nvhost_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
2328
2329         return 0;
2330
2331 clean_up:
2332         return -ENOMEM;
2333 }
2334
2335 static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
2336 {
2337         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2338         void *mmu_ptr;
2339
2340         gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
2341
2342         gr->mmu_wr_mem.mem.ref = mem_op().alloc(memmgr, gr->mmu_wr_mem_size,
2343                                              DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2344                                              DEFAULT_NVMAP_ALLOC_FLAGS,
2345                                              NVMAP_HEAP_CARVEOUT_GENERIC);
2346         if (!gr->mmu_wr_mem.mem.ref)
2347                 goto clean_up;
2348         gr->mmu_wr_mem.mem.size = gr->mmu_wr_mem_size;
2349
2350         gr->mmu_rd_mem.mem.ref = mem_op().alloc(memmgr, gr->mmu_rd_mem_size,
2351                                              DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2352                                              DEFAULT_NVMAP_ALLOC_FLAGS,
2353                                              NVMAP_HEAP_CARVEOUT_GENERIC);
2354         if (!gr->mmu_rd_mem.mem.ref)
2355                 goto clean_up;
2356         gr->mmu_rd_mem.mem.size = gr->mmu_rd_mem_size;
2357
2358         mmu_ptr = mem_op().mmap(gr->mmu_wr_mem.mem.ref);
2359         if (!mmu_ptr)
2360                 goto clean_up;
2361         memset(mmu_ptr, 0, gr->mmu_wr_mem.mem.size);
2362         mem_op().munmap(gr->mmu_wr_mem.mem.ref, mmu_ptr);
2363
2364         mmu_ptr = mem_op().mmap(gr->mmu_rd_mem.mem.ref);
2365         if (!mmu_ptr)
2366                 goto clean_up;
2367         memset(mmu_ptr, 0, gr->mmu_rd_mem.mem.size);
2368         mem_op().munmap(gr->mmu_rd_mem.mem.ref, mmu_ptr);
2369
2370         gr->mmu_wr_mem.cpu_pa = mem_op().pin(memmgr, gr->mmu_wr_mem.mem.ref);
2371         if (gr->mmu_wr_mem.cpu_pa == -EINVAL || gr->mmu_wr_mem.cpu_pa == -EINTR)
2372                 goto clean_up;
2373
2374         gr->mmu_rd_mem.cpu_pa = mem_op().pin(memmgr, gr->mmu_rd_mem.mem.ref);
2375         if (gr->mmu_rd_mem.cpu_pa == -EINVAL || gr->mmu_rd_mem.cpu_pa == -EINTR)
2376                 goto clean_up;
2377
2378         return 0;
2379
2380 clean_up:
2381         return -ENOMEM;
2382 }
2383
2384 static u32 prime_set[18] = {
2385         2, 3, 5, 7, 11, 13, 17, 19, 23, 39, 31, 37, 41, 43, 47, 53, 59, 61 };
2386
2387 static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
2388 {
2389         s32 comm_denom;
2390         s32 mul_factor;
2391         s32 *init_frac = NULL;
2392         s32 *init_err = NULL;
2393         s32 *run_err = NULL;
2394         s32 *sorted_num_tpcs = NULL;
2395         s32 *sorted_to_unsorted_gpc_map = NULL;
2396         u32 gpc_index;
2397         u32 gpc_mark = 0;
2398         u32 num_tpc;
2399         u32 max_tpc_count = 0;
2400         u32 swap;
2401         u32 tile_count;
2402         u32 index;
2403         bool delete_map = false;
2404         bool gpc_sorted;
2405         int ret = 0;
2406
2407         init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2408         init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2409         run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2410         sorted_num_tpcs =
2411                 kzalloc(proj_scal_max_gpcs_v() *
2412                         proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
2413                         GFP_KERNEL);
2414         sorted_to_unsorted_gpc_map =
2415                 kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2416
2417         if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
2418               sorted_to_unsorted_gpc_map)) {
2419                 ret = -ENOMEM;
2420                 goto clean_up;
2421         }
2422
2423         gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
2424
2425         if (gr->tpc_count == 3)
2426                 gr->map_row_offset = 2;
2427         else if (gr->tpc_count < 3)
2428                 gr->map_row_offset = 1;
2429         else {
2430                 gr->map_row_offset = 3;
2431
2432                 for (index = 1; index < 18; index++) {
2433                         u32 prime = prime_set[index];
2434                         if ((gr->tpc_count % prime) != 0) {
2435                                 gr->map_row_offset = prime;
2436                                 break;
2437                         }
2438                 }
2439         }
2440
2441         switch (gr->tpc_count) {
2442         case 15:
2443                 gr->map_row_offset = 6;
2444                 break;
2445         case 14:
2446                 gr->map_row_offset = 5;
2447                 break;
2448         case 13:
2449                 gr->map_row_offset = 2;
2450                 break;
2451         case 11:
2452                 gr->map_row_offset = 7;
2453                 break;
2454         case 10:
2455                 gr->map_row_offset = 6;
2456                 break;
2457         case 7:
2458         case 5:
2459                 gr->map_row_offset = 1;
2460                 break;
2461         default:
2462                 break;
2463         }
2464
2465         if (gr->map_tiles) {
2466                 if (gr->map_tile_count != gr->tpc_count)
2467                         delete_map = true;
2468
2469                 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
2470                         if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
2471                                 delete_map = true;
2472                 }
2473
2474                 if (delete_map) {
2475                         kfree(gr->map_tiles);
2476                         gr->map_tiles = NULL;
2477                         gr->map_tile_count = 0;
2478                 }
2479         }
2480
2481         if (gr->map_tiles == NULL) {
2482                 gr->map_tile_count = proj_scal_max_gpcs_v();
2483
2484                 gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
2485                 if (gr->map_tiles == NULL) {
2486                         ret = -ENOMEM;
2487                         goto clean_up;
2488                 }
2489
2490                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2491                         sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
2492                         sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
2493                 }
2494
2495                 gpc_sorted = false;
2496                 while (!gpc_sorted) {
2497                         gpc_sorted = true;
2498                         for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
2499                                 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
2500                                         gpc_sorted = false;
2501                                         swap = sorted_num_tpcs[gpc_index];
2502                                         sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
2503                                         sorted_num_tpcs[gpc_index + 1] = swap;
2504                                         swap = sorted_to_unsorted_gpc_map[gpc_index];
2505                                         sorted_to_unsorted_gpc_map[gpc_index] =
2506                                                 sorted_to_unsorted_gpc_map[gpc_index + 1];
2507                                         sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
2508                                 }
2509                         }
2510                 }
2511
2512                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2513                         if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
2514                                 max_tpc_count = gr->gpc_tpc_count[gpc_index];
2515
2516                 mul_factor = gr->gpc_count * max_tpc_count;
2517                 if (mul_factor & 0x1)
2518                         mul_factor = 2;
2519                 else
2520                         mul_factor = 1;
2521
2522                 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
2523
2524                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2525                         num_tpc = sorted_num_tpcs[gpc_index];
2526
2527                         init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
2528
2529                         if (num_tpc != 0)
2530                                 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
2531                         else
2532                                 init_err[gpc_index] = 0;
2533
2534                         run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
2535                 }
2536
2537                 while (gpc_mark < gr->tpc_count) {
2538                         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2539                                 if ((run_err[gpc_index] * 2) >= comm_denom) {
2540                                         gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
2541                                         run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
2542                                 } else
2543                                         run_err[gpc_index] += init_frac[gpc_index];
2544                         }
2545                 }
2546         }
2547
2548 clean_up:
2549         kfree(init_frac);
2550         kfree(init_err);
2551         kfree(run_err);
2552         kfree(sorted_num_tpcs);
2553         kfree(sorted_to_unsorted_gpc_map);
2554
2555         if (ret)
2556                 nvhost_dbg(dbg_fn | dbg_err, "fail");
2557         else
2558                 nvhost_dbg_fn("done");
2559
2560         return ret;
2561 }
2562
2563 static int gr_gk20a_init_comptag(struct gk20a *g, struct gr_gk20a *gr)
2564 {
2565         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2566
2567         /* max memory size (MB) to cover */
2568         u32 max_size = gr->max_comptag_mem;
2569         /* one tag line covers 128KB */
2570         u32 max_comptag_lines = max_size << 3;
2571
2572         u32 hw_max_comptag_lines =
2573                 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
2574
2575         u32 cbc_param =
2576                 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
2577         u32 comptags_per_cacheline =
2578                 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
2579         u32 slices_per_fbp =
2580                 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param);
2581         u32 cacheline_size =
2582                 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
2583
2584         u32 compbit_backing_size;
2585         int ret = 0;
2586
2587         nvhost_dbg_fn("");
2588
2589         if (max_comptag_lines == 0) {
2590                 gr->compbit_store.mem.size = 0;
2591                 return 0;
2592         }
2593
2594         if (max_comptag_lines > hw_max_comptag_lines)
2595                 max_comptag_lines = hw_max_comptag_lines;
2596
2597         /* no hybird fb */
2598         compbit_backing_size =
2599                 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
2600                 cacheline_size * slices_per_fbp * gr->num_fbps;
2601
2602         /* aligned to 2KB * num_fbps */
2603         compbit_backing_size +=
2604                 gr->num_fbps << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
2605
2606         /* must be a multiple of 64KB */
2607         compbit_backing_size = roundup(compbit_backing_size, 64*1024);
2608
2609         max_comptag_lines =
2610                 (compbit_backing_size * comptags_per_cacheline) /
2611                 cacheline_size * slices_per_fbp * gr->num_fbps;
2612
2613         if (max_comptag_lines > hw_max_comptag_lines)
2614                 max_comptag_lines = hw_max_comptag_lines;
2615
2616         nvhost_dbg_info("compbit backing store size : %d",
2617                 compbit_backing_size);
2618         nvhost_dbg_info("max comptag lines : %d",
2619                 max_comptag_lines);
2620
2621         gr->compbit_store.mem.ref =
2622                 mem_op().alloc(memmgr, compbit_backing_size,
2623                             DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2624                             DEFAULT_NVMAP_ALLOC_FLAGS,
2625                             NVMAP_HEAP_CARVEOUT_GENERIC);
2626         if (IS_ERR_OR_NULL(gr->compbit_store.mem.ref)) {
2627                 nvhost_err(dev_from_gk20a(g), "failed to allocate"
2628                            "backing store for compbit : size %d",
2629                            compbit_backing_size);
2630                 return -ENOMEM;
2631         }
2632         gr->compbit_store.mem.size = compbit_backing_size;
2633
2634         gr->compbit_store.base_pa =
2635                 mem_op().pin(memmgr, gr->compbit_store.mem.ref);
2636         if (gr->compbit_store.base_pa == -EINVAL ||
2637             gr->compbit_store.base_pa == -EINTR) {
2638                 ret = -ENOMEM;
2639                 goto clean_up;
2640         }
2641
2642         nvhost_allocator_init(&gr->comp_tags, "comptag",
2643                         1, max_comptag_lines, 1);
2644
2645 clean_up:
2646         mem_op().put(memmgr, gr->compbit_store.mem.ref);
2647         return ret;
2648 }
2649
2650 int gk20a_gr_clear_comptags(struct gk20a *g, u32 min, u32 max)
2651 {
2652         struct gr_gk20a *gr = &g->gr;
2653         u32 fbp, slice, ctrl1, val;
2654         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2655         u32 slices_per_fbp =
2656                 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
2657                         gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
2658
2659         nvhost_dbg_fn("");
2660
2661         if (gr->compbit_store.mem.size == 0)
2662                 return 0;
2663
2664         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
2665                      ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
2666         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
2667                      ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
2668         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
2669                      gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) |
2670                      ltc_ltcs_ltss_cbc_ctrl1_clear_active_f());
2671
2672         for (fbp = 0; fbp < gr->num_fbps; fbp++) {
2673                 for (slice = 0; slice < slices_per_fbp; slice++) {
2674                         ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
2675                                 fbp * proj_ltc_pri_stride_v() +
2676                                 slice * proj_lts_pri_stride_v();
2677
2678                         do {
2679                                 u32 check = min_t(u32,
2680                                         GR_IDLE_CHECK_PERIOD, timeout);
2681
2682                                 val = gk20a_readl(g, ctrl1);
2683                                 if (ltc_ltc0_lts0_cbc_ctrl1_clear_v(val) !=
2684                                     ltc_ltc0_lts0_cbc_ctrl1_clear_active_v())
2685                                         break;
2686
2687                                 udelay(GR_IDLE_CHECK_PERIOD);
2688                                 timeout -= check;
2689
2690                         } while (timeout);
2691
2692                         if (timeout == 0) {
2693                                 nvhost_err(dev_from_gk20a(g),
2694                                            "comp tag clear timeout\n");
2695                                 return -EBUSY;
2696                         }
2697                 }
2698         }
2699
2700         return 0;
2701 }
2702
2703 static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
2704 {
2705         struct gr_zcull_gk20a *zcull = &gr->zcull;
2706
2707         zcull->aliquot_width = gr->tpc_count * 16;
2708         zcull->aliquot_height = 16;
2709
2710         zcull->width_align_pixels = gr->tpc_count * 16;
2711         zcull->height_align_pixels = 32;
2712
2713         zcull->aliquot_size =
2714                 zcull->aliquot_width * zcull->aliquot_height;
2715
2716         /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
2717         zcull->pixel_squares_by_aliquots =
2718                 gr->zcb_count * 16 * 16 * gr->tpc_count /
2719                 (gr->gpc_count * gr->gpc_tpc_count[0]);
2720
2721         zcull->total_aliquots =
2722                 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
2723                         gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
2724
2725         return 0;
2726 }
2727
2728 u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
2729 {
2730         /* assuming gr has already been initialized */
2731         return gr->ctx_vars.zcull_ctxsw_image_size;
2732 }
2733
2734 int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
2735                         struct channel_gk20a *c, u64 zcull_va, u32 mode)
2736 {
2737         struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
2738
2739         zcull_ctx->ctx_sw_mode = mode;
2740         zcull_ctx->gpu_va = zcull_va;
2741
2742         /* TBD: don't disable channel in sw method processing */
2743         return gr_gk20a_ctx_zcull_setup(g, c, true);
2744 }
2745
2746 int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
2747                         struct gr_zcull_info *zcull_params)
2748 {
2749         struct gr_zcull_gk20a *zcull = &gr->zcull;
2750
2751         zcull_params->width_align_pixels = zcull->width_align_pixels;
2752         zcull_params->height_align_pixels = zcull->height_align_pixels;
2753         zcull_params->pixel_squares_by_aliquots =
2754                 zcull->pixel_squares_by_aliquots;
2755         zcull_params->aliquot_total = zcull->total_aliquots;
2756
2757         zcull_params->region_byte_multiplier =
2758                 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
2759         zcull_params->region_header_size =
2760                 proj_scal_litter_num_gpcs_v() *
2761                 gr_zcull_save_restore_header_bytes_per_gpc_v();
2762
2763         zcull_params->subregion_header_size =
2764                 proj_scal_litter_num_gpcs_v() *
2765                 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
2766
2767         zcull_params->subregion_width_align_pixels =
2768                 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
2769         zcull_params->subregion_height_align_pixels =
2770                 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
2771         zcull_params->subregion_count = gr_zcull_subregion_qty_v();
2772
2773         return 0;
2774 }
2775
2776 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
2777                                 struct zbc_entry *color_val, u32 index)
2778 {
2779         struct fifo_gk20a *f = &g->fifo;
2780         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2781         u32 i;
2782         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2783         u32 ret;
2784
2785         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2786         if (ret) {
2787                 nvhost_err(dev_from_gk20a(g),
2788                         "failed to disable gr engine activity\n");
2789                 return ret;
2790         }
2791
2792         ret = gr_gk20a_wait_idle(g, &timeout);
2793         if (ret) {
2794                 nvhost_err(dev_from_gk20a(g),
2795                         "failed to idle graphics\n");
2796                 goto clean_up;
2797         }
2798
2799         /* update l2 table */
2800         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2801                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2802                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2803                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2804                                         GK20A_STARTOF_ZBC_TABLE));
2805
2806         for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
2807                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
2808                         color_val->color_l2[i]);
2809
2810         /* update ds table */
2811         gk20a_writel(g, gr_ds_zbc_color_r_r(),
2812                 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
2813         gk20a_writel(g, gr_ds_zbc_color_g_r(),
2814                 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
2815         gk20a_writel(g, gr_ds_zbc_color_b_r(),
2816                 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
2817         gk20a_writel(g, gr_ds_zbc_color_a_r(),
2818                 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
2819
2820         gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
2821                 gr_ds_zbc_color_fmt_val_f(color_val->format));
2822
2823         gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2824                 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2825
2826         /* trigger the write */
2827         gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2828                 gr_ds_zbc_tbl_ld_select_c_f() |
2829                 gr_ds_zbc_tbl_ld_action_write_f() |
2830                 gr_ds_zbc_tbl_ld_trigger_active_f());
2831
2832         /* update local copy */
2833         for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) {
2834                 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
2835                 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
2836         }
2837         gr->zbc_col_tbl[index].format = color_val->format;
2838         gr->zbc_col_tbl[index].ref_cnt++;
2839
2840 clean_up:
2841         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2842         if (ret) {
2843                 nvhost_err(dev_from_gk20a(g),
2844                         "failed to enable gr engine activity\n");
2845         }
2846
2847         return ret;
2848 }
2849
2850 static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
2851                                 struct zbc_entry *depth_val, u32 index)
2852 {
2853         struct fifo_gk20a *f = &g->fifo;
2854         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2855         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2856         u32 ret;
2857
2858         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2859         if (ret) {
2860                 nvhost_err(dev_from_gk20a(g),
2861                         "failed to disable gr engine activity\n");
2862                 return ret;
2863         }
2864
2865         ret = gr_gk20a_wait_idle(g, &timeout);
2866         if (ret) {
2867                 nvhost_err(dev_from_gk20a(g),
2868                         "failed to idle graphics\n");
2869                 goto clean_up;
2870         }
2871
2872         /* update l2 table */
2873         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2874                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2875                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2876                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2877                                         GK20A_STARTOF_ZBC_TABLE));
2878
2879         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
2880                         depth_val->depth);
2881
2882         /* update ds table */
2883         gk20a_writel(g, gr_ds_zbc_z_r(),
2884                 gr_ds_zbc_z_val_f(depth_val->depth));
2885
2886         gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
2887                 gr_ds_zbc_z_fmt_val_f(depth_val->format));
2888
2889         gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2890                 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2891
2892         /* trigger the write */
2893         gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2894                 gr_ds_zbc_tbl_ld_select_z_f() |
2895                 gr_ds_zbc_tbl_ld_action_write_f() |
2896                 gr_ds_zbc_tbl_ld_trigger_active_f());
2897
2898         /* update local copy */
2899         gr->zbc_dep_tbl[index].depth = depth_val->depth;
2900         gr->zbc_dep_tbl[index].format = depth_val->format;
2901         gr->zbc_dep_tbl[index].ref_cnt++;
2902
2903 clean_up:
2904         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2905         if (ret) {
2906                 nvhost_err(dev_from_gk20a(g),
2907                         "failed to enable gr engine activity\n");
2908         }
2909
2910         return ret;
2911 }
2912
2913 int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
2914                      struct zbc_entry *zbc_val)
2915 {
2916         struct zbc_color_table *c_tbl;
2917         struct zbc_depth_table *d_tbl;
2918         u32 i, ret = -ENOMEM;
2919         bool added = false;
2920
2921         /* no endian swap ? */
2922
2923         switch (zbc_val->type) {
2924         case GK20A_ZBC_TYPE_COLOR:
2925                 /* search existing tables */
2926                 for (i = 0; i < gr->max_used_color_index; i++) {
2927
2928                         c_tbl = &gr->zbc_col_tbl[i];
2929
2930                         if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
2931                             memcmp(c_tbl->color_ds, zbc_val->color_ds,
2932                                 sizeof(zbc_val->color_ds)) == 0) {
2933
2934                                 if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
2935                                     sizeof(zbc_val->color_l2))) {
2936                                         nvhost_err(dev_from_gk20a(g),
2937                                                 "zbc l2 and ds color don't match with existing entries");
2938                                         return -EINVAL;
2939                                 }
2940                                 added = true;
2941                                 c_tbl->ref_cnt++;
2942                                 ret = 0;
2943                                 break;
2944                         }
2945                 }
2946                 /* add new table */
2947                 if (!added &&
2948                     gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
2949
2950                         c_tbl =
2951                             &gr->zbc_col_tbl[gr->max_used_color_index];
2952                         WARN_ON(c_tbl->ref_cnt != 0);
2953
2954                         ret = gr_gk20a_add_zbc_color(g, gr,
2955                                 zbc_val, gr->max_used_color_index);
2956
2957                         if (!ret)
2958                                 gr->max_used_color_index++;
2959                 }
2960                 break;
2961         case GK20A_ZBC_TYPE_DEPTH:
2962                 /* search existing tables */
2963                 for (i = 0; i < gr->max_used_depth_index; i++) {
2964
2965                         d_tbl = &gr->zbc_dep_tbl[i];
2966
2967                         if (d_tbl->ref_cnt &&
2968                             d_tbl->depth == zbc_val->depth &&
2969                             d_tbl->format == zbc_val->format) {
2970                                 added = true;
2971                                 d_tbl->ref_cnt++;
2972                                 ret = 0;
2973                                 break;
2974                         }
2975                 }
2976                 /* add new table */
2977                 if (!added &&
2978                     gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
2979
2980                         d_tbl =
2981                             &gr->zbc_dep_tbl[gr->max_used_depth_index];
2982                         WARN_ON(d_tbl->ref_cnt != 0);
2983
2984                         ret = gr_gk20a_add_zbc_depth(g, gr,
2985                                 zbc_val, gr->max_used_depth_index);
2986
2987                         if (!ret)
2988                                 gr->max_used_depth_index++;
2989                 }
2990                 break;
2991         default:
2992                 nvhost_err(dev_from_gk20a(g),
2993                         "invalid zbc table type %d", zbc_val->type);
2994                 return -EINVAL;
2995         }
2996
2997         if (added && ret == 0) {
2998                 /* update zbc for elpg */
2999         }
3000
3001         return ret;
3002 }
3003
3004 int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
3005 {
3006         struct fifo_gk20a *f = &g->fifo;
3007         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
3008         u32 i, j;
3009         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3010         u32 ret;
3011
3012         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3013         if (ret) {
3014                 nvhost_err(dev_from_gk20a(g),
3015                         "failed to disable gr engine activity\n");
3016                 return ret;
3017         }
3018
3019         ret = gr_gk20a_wait_idle(g, &timeout);
3020         if (ret) {
3021                 nvhost_err(dev_from_gk20a(g),
3022                         "failed to idle graphics\n");
3023                 goto clean_up;
3024         }
3025
3026         for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3027                 gr->zbc_col_tbl[i].format = 0;
3028                 gr->zbc_col_tbl[i].ref_cnt = 0;
3029
3030                 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3031                         gr_ds_zbc_color_fmt_val_invalid_f());
3032                 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3033                         gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3034
3035                 /* trigger the write */
3036                 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3037                         gr_ds_zbc_tbl_ld_select_c_f() |
3038                         gr_ds_zbc_tbl_ld_action_write_f() |
3039                         gr_ds_zbc_tbl_ld_trigger_active_f());
3040
3041                 /* clear l2 table */
3042                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3043                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3044                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3045                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
3046                                         GK20A_STARTOF_ZBC_TABLE));
3047
3048                 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++) {
3049                         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
3050                         gr->zbc_col_tbl[i].color_l2[j] = 0;
3051                         gr->zbc_col_tbl[i].color_ds[j] = 0;
3052                 }
3053         }
3054         gr->max_used_color_index = 0;
3055         gr->max_default_color_index = 0;
3056
3057         for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3058                 gr->zbc_dep_tbl[i].depth = 0;
3059                 gr->zbc_dep_tbl[i].format = 0;
3060                 gr->zbc_dep_tbl[i].ref_cnt = 0;
3061
3062                 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3063                         gr_ds_zbc_z_fmt_val_invalid_f());
3064                 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3065                         gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3066
3067                 /* trigger the write */
3068                 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3069                         gr_ds_zbc_tbl_ld_select_z_f() |
3070                         gr_ds_zbc_tbl_ld_action_write_f() |
3071                         gr_ds_zbc_tbl_ld_trigger_active_f());
3072
3073                 /* clear l2 table */
3074                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3075                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3076                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3077                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
3078                                         GK20A_STARTOF_ZBC_TABLE));
3079
3080                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
3081         }
3082         gr->max_used_depth_index = 0;
3083         gr->max_default_depth_index = 0;
3084
3085 clean_up:
3086         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3087         if (ret) {
3088                 nvhost_err(dev_from_gk20a(g),
3089                         "failed to enable gr engine activity\n");
3090         }
3091
3092         /* elpg stuff */
3093
3094         return ret;
3095 }
3096
3097 /* get a zbc table entry specified by index
3098  * return table size when type is invalid */
3099 int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
3100                         struct zbc_query_params *query_params)
3101 {
3102         u32 index = query_params->index_size;
3103         u32 i;
3104
3105         switch (query_params->type) {
3106         case GK20A_ZBC_TYPE_INVALID:
3107                 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
3108                 break;
3109         case GK20A_ZBC_TYPE_COLOR:
3110                 if (index >= GK20A_ZBC_TABLE_SIZE) {
3111                         nvhost_err(dev_from_gk20a(g),
3112                                 "invalid zbc color table index\n");
3113                         return -EINVAL;
3114                 }
3115                 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3116                         query_params->color_l2[i] =
3117                                 gr->zbc_col_tbl[index].color_l2[i];
3118                         query_params->color_ds[i] =
3119                                 gr->zbc_col_tbl[index].color_ds[i];
3120                 }
3121                 query_params->format = gr->zbc_col_tbl[index].format;
3122                 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
3123                 break;
3124         case GK20A_ZBC_TYPE_DEPTH:
3125                 if (index >= GK20A_ZBC_TABLE_SIZE) {
3126                         nvhost_err(dev_from_gk20a(g),
3127                                 "invalid zbc depth table index\n");
3128                         return -EINVAL;
3129                 }
3130                 query_params->depth = gr->zbc_dep_tbl[index].depth;
3131                 query_params->format = gr->zbc_dep_tbl[index].format;
3132                 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
3133                 break;
3134         default:
3135                 nvhost_err(dev_from_gk20a(g),
3136                                 "invalid zbc table type\n");
3137                 return -EINVAL;
3138         }
3139
3140         return 0;
3141 }
3142
3143 static int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
3144 {
3145         struct zbc_entry zbc_val;
3146         u32 i, err;
3147
3148         /* load default color table */
3149         zbc_val.type = GK20A_ZBC_TYPE_COLOR;
3150
3151         zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
3152         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3153                 zbc_val.color_ds[i] = 0;
3154                 zbc_val.color_l2[i] = 0;
3155         }
3156         err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3157
3158         zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
3159         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3160                 zbc_val.color_ds[i] = 0xffffffff;
3161                 zbc_val.color_l2[i] = 0x3f800000;
3162         }
3163         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3164
3165         zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3166         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3167                 zbc_val.color_ds[i] = 0;
3168                 zbc_val.color_l2[i] = 0;
3169         }
3170         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3171
3172         zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3173         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3174                 zbc_val.color_ds[i] = 0x3f800000;
3175                 zbc_val.color_l2[i] = 0x3f800000;
3176         }
3177         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3178
3179         if (!err)
3180                 gr->max_default_color_index = 4;
3181         else {
3182                 nvhost_err(dev_from_gk20a(g),
3183                            "fail to load default zbc color table\n");
3184                 return err;
3185         }
3186
3187         /* load default depth table */
3188         zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
3189
3190         zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3191         zbc_val.depth = 0;
3192         err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3193
3194         zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3195         zbc_val.depth = 0x3f800000;
3196         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3197
3198         if (!err)
3199                 gr->max_default_depth_index = 2;
3200         else {
3201                 nvhost_err(dev_from_gk20a(g),
3202                            "fail to load default zbc depth table\n");
3203                 return err;
3204         }
3205
3206         return 0;
3207 }
3208
3209 static int gr_gk20a_init_zbc(struct gk20a *g, struct gr_gk20a *gr)
3210 {
3211         u32 i, j;
3212
3213         /* reset zbc clear */
3214         for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE -
3215             GK20A_STARTOF_ZBC_TABLE; i++) {
3216                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3217                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3218                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3219                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(
3220                                         i + GK20A_STARTOF_ZBC_TABLE));
3221                 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++)
3222                         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
3223                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
3224         }
3225
3226         gr_gk20a_clear_zbc_table(g, gr);
3227
3228         gr_gk20a_load_zbc_default_table(g, gr);
3229
3230         return 0;
3231 }
3232
3233 static void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
3234 {
3235         u32 gate_ctrl, idle_filter;
3236
3237         gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
3238
3239         switch (mode) {
3240         case ELCG_RUN:
3241                 gate_ctrl = set_field(gate_ctrl,
3242                                 therm_gate_ctrl_eng_clk_m(),
3243                                 therm_gate_ctrl_eng_clk_run_f());
3244                 gate_ctrl = set_field(gate_ctrl,
3245                                 therm_gate_ctrl_eng_pwr_m(),
3246                                 /* set elpg to auto to meet hw expectation */
3247                                 therm_gate_ctrl_eng_pwr_auto_f());
3248                 break;
3249         case ELCG_STOP:
3250                 gate_ctrl = set_field(gate_ctrl,
3251                                 therm_gate_ctrl_eng_clk_m(),
3252                                 therm_gate_ctrl_eng_clk_stop_f());
3253                 break;
3254         case ELCG_AUTO:
3255                 gate_ctrl = set_field(gate_ctrl,
3256                                 therm_gate_ctrl_eng_clk_m(),
3257                                 therm_gate_ctrl_eng_clk_auto_f());
3258                 break;
3259         default:
3260                 nvhost_err(dev_from_gk20a(g),
3261                         "invalid elcg mode %d", mode);
3262         }
3263
3264         if (tegra_revision == TEGRA_REVISION_SIM) {
3265                 gate_ctrl = set_field(gate_ctrl,
3266                         therm_gate_ctrl_eng_delay_after_m(),
3267                         therm_gate_ctrl_eng_delay_after_f(4));
3268         }
3269
3270         /* 2 * (1 << 5) = 64 clks */
3271         gate_ctrl = set_field(gate_ctrl,
3272                 therm_gate_ctrl_eng_idle_filt_exp_m(),
3273                 therm_gate_ctrl_eng_idle_filt_exp_f(5));
3274         gate_ctrl = set_field(gate_ctrl,
3275                 therm_gate_ctrl_eng_idle_filt_mant_m(),
3276                 therm_gate_ctrl_eng_idle_filt_mant_f(2));
3277         gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
3278
3279         /* default fecs_idle_filter to 0 */
3280         idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
3281         idle_filter &= ~therm_fecs_idle_filter_value_m();
3282         gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
3283         /* default hubmmu_idle_filter to 0 */
3284         idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
3285         idle_filter &= ~therm_hubmmu_idle_filter_value_m();
3286         gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
3287 }
3288
3289 static void gr_gk20a_load_gating_prod(struct gk20a *g,
3290                 const struct gating_desc *desc, u32 size, bool prod)
3291 {
3292         u32 i;
3293         for (i = 0; i < size; i++) {
3294                 if (prod)
3295                         gk20a_writel(g, desc->addr, desc->prod);
3296                 else
3297                         gk20a_writel(g, desc->addr, desc->disable);
3298         }
3299 }
3300
3301 static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
3302 {
3303         u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
3304         u32 *zcull_map_tiles, *zcull_bank_counters;
3305         u32 map_counter;
3306         u32 rcp_conserv;
3307         u32 offset;
3308         bool floorsweep = false;
3309
3310         if (!gr->map_tiles)
3311                 return -1;
3312
3313         zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
3314                         proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3315         zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
3316                         proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3317
3318         if (!zcull_map_tiles && !zcull_bank_counters) {
3319                 nvhost_err(dev_from_gk20a(g),
3320                         "failed to allocate zcull temp buffers");
3321                 return -ENOMEM;
3322         }
3323
3324         for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
3325                 zcull_map_tiles[map_counter] =
3326                         zcull_bank_counters[gr->map_tiles[map_counter]];
3327                 zcull_bank_counters[gr->map_tiles[map_counter]]++;
3328         }
3329
3330         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
3331                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
3332                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
3333                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
3334                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
3335                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
3336                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
3337                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
3338                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
3339
3340         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
3341                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
3342                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
3343                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
3344                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
3345                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
3346                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
3347                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
3348                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
3349
3350         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
3351                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
3352                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
3353                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
3354                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
3355                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
3356                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
3357                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
3358                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
3359
3360         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
3361                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
3362                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
3363                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
3364                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
3365                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
3366                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
3367                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
3368                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
3369
3370         kfree(zcull_map_tiles);
3371         kfree(zcull_bank_counters);
3372
3373         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3374                 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
3375                 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
3376
3377                 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3378                     gpc_zcull_count < gpc_tpc_count) {
3379                         nvhost_err(dev_from_gk20a(g),
3380                                 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
3381                                 gpc_zcull_count, gpc_tpc_count, gpc_index);
3382                         return -EINVAL;
3383                 }
3384                 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3385                     gpc_zcull_count != 0)
3386                         floorsweep = true;
3387         }
3388
3389         /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
3390         rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
3391
3392         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3393                 offset = gpc_index * proj_gpc_stride_v();
3394
3395                 if (floorsweep) {
3396                         gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3397                                 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3398                                 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3399                                         gr->max_zcull_per_gpc_count));
3400                 } else {
3401                         gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3402                                 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3403                                 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3404                                         gr->gpc_tpc_count[gpc_index]));
3405                 }
3406
3407                 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
3408                         gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
3409                         gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
3410
3411                 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
3412                         gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
3413         }
3414
3415         gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
3416                 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
3417
3418         return 0;
3419 }
3420
3421 static int gk20a_init_gr_setup_hw(struct gk20a *g)
3422 {
3423         struct gr_gk20a *gr = &g->gr;
3424         struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
3425         struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
3426         struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
3427         u32 data;
3428         u32 addr_lo, addr_hi, addr;
3429         u32 compbit_base_post_divide;
3430         u32 compbit_base_post_multiply;
3431         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3432         u32 fe_go_idle_timeout_save;
3433         u32 last_bundle_data = 0;
3434         u32 last_method_data = 0;
3435         u32 i, err;
3436
3437         nvhost_dbg_fn("");
3438
3439         /* slcg prod values */
3440         gr_gk20a_load_gating_prod(g, gk20a_slcg_gr,
3441                 sizeof(gk20a_slcg_gr)/sizeof(struct gating_desc), true);
3442         gr_gk20a_load_gating_prod(g, gk20a_slcg_perf,
3443                 sizeof(gk20a_slcg_perf)/sizeof(struct gating_desc), true);
3444
3445         /* init mmu debug buffer */
3446         addr_lo = u64_lo32(gr->mmu_wr_mem.cpu_pa);
3447         addr_hi = u64_hi32(gr->mmu_wr_mem.cpu_pa);
3448         addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
3449                 (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
3450
3451         gk20a_writel(g, fb_mmu_debug_wr_r(),
3452                      fb_mmu_debug_wr_aperture_vid_mem_f() |
3453                      fb_mmu_debug_wr_vol_false_f() |
3454                      fb_mmu_debug_wr_addr_v(addr));
3455
3456         addr_lo = u64_lo32(gr->mmu_rd_mem.cpu_pa);
3457         addr_hi = u64_hi32(gr->mmu_rd_mem.cpu_pa);
3458         addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
3459                 (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
3460
3461         gk20a_writel(g, fb_mmu_debug_rd_r(),
3462                      fb_mmu_debug_rd_aperture_vid_mem_f() |
3463                      fb_mmu_debug_rd_vol_false_f() |
3464                      fb_mmu_debug_rd_addr_v(addr));
3465
3466         /* load gr floorsweeping registers */
3467         data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
3468         data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
3469                         gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
3470         gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
3471
3472         gr_gk20a_zcull_init_hw(g, gr);
3473
3474         gr_gk20a_load_gating_prod(g, gk20a_blcg_gr,
3475                 sizeof(gk20a_blcg_gr)/sizeof(struct gating_desc), true);
3476         gr_gk20a_load_gating_prod(g, gk20a_pg_gr,
3477                 sizeof(gk20a_pg_gr)/sizeof(struct gating_desc), true);
3478
3479         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
3480         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
3481
3482         /* enable fifo access */
3483         gk20a_writel(g, gr_gpfifo_ctl_r(),
3484                      gr_gpfifo_ctl_access_enabled_f() |
3485                      gr_gpfifo_ctl_semaphore_access_enabled_f());
3486
3487         /* TBD: reload gr ucode when needed */
3488
3489         /* enable interrupts */
3490         gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
3491         gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
3492
3493         /* enable fecs error interrupts */
3494         gk20a_writel(g, gr_fecs_host_int_enable_r(),
3495                      gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
3496                      gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
3497                      gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
3498                      gr_fecs_host_int_enable_watchdog_enable_f());
3499
3500         /* enable exceptions */
3501         gk20a_writel(g, gr_fe_hww_esr_r(),
3502                      gr_fe_hww_esr_en_enable_f() |
3503                      gr_fe_hww_esr_reset_active_f());
3504         gk20a_writel(g, gr_memfmt_hww_esr_r(),
3505                      gr_memfmt_hww_esr_en_enable_f() |
3506                      gr_memfmt_hww_esr_reset_active_f());
3507         gk20a_writel(g, gr_scc_hww_esr_r(),
3508                      gr_scc_hww_esr_en_enable_f() |
3509                      gr_scc_hww_esr_reset_active_f());
3510         gk20a_writel(g, gr_mme_hww_esr_r(),
3511                      gr_mme_hww_esr_en_enable_f() |
3512                      gr_mme_hww_esr_reset_active_f());
3513         gk20a_writel(g, gr_pd_hww_esr_r(),
3514                      gr_pd_hww_esr_en_enable_f() |
3515                      gr_pd_hww_esr_reset_active_f());
3516         gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
3517                      gr_sked_hww_esr_reset_active_f());
3518         gk20a_writel(g, gr_ds_hww_esr_r(),
3519                      gr_ds_hww_esr_en_enabled_f() |
3520                      gr_ds_hww_esr_reset_task_f());
3521         gk20a_writel(g, gr_ds_hww_report_mask_r(),
3522                      gr_ds_hww_report_mask_sph0_err_report_f() |
3523                      gr_ds_hww_report_mask_sph1_err_report_f() |
3524                      gr_ds_hww_report_mask_sph2_err_report_f() |
3525                      gr_ds_hww_report_mask_sph3_err_report_f() |
3526                      gr_ds_hww_report_mask_sph4_err_report_f() |
3527                      gr_ds_hww_report_mask_sph5_err_report_f() |
3528                      gr_ds_hww_report_mask_sph6_err_report_f() |
3529                      gr_ds_hww_report_mask_sph7_err_report_f() |
3530                      gr_ds_hww_report_mask_sph8_err_report_f() |
3531                      gr_ds_hww_report_mask_sph9_err_report_f() |
3532                      gr_ds_hww_report_mask_sph10_err_report_f() |
3533                      gr_ds_hww_report_mask_sph11_err_report_f() |
3534                      gr_ds_hww_report_mask_sph12_err_report_f() |
3535                      gr_ds_hww_report_mask_sph13_err_report_f() |
3536                      gr_ds_hww_report_mask_sph14_err_report_f() |
3537                      gr_ds_hww_report_mask_sph15_err_report_f() |
3538                      gr_ds_hww_report_mask_sph16_err_report_f() |
3539                      gr_ds_hww_report_mask_sph17_err_report_f() |
3540                      gr_ds_hww_report_mask_sph18_err_report_f() |
3541                      gr_ds_hww_report_mask_sph19_err_report_f() |
3542                      gr_ds_hww_report_mask_sph20_err_report_f() |
3543                      gr_ds_hww_report_mask_sph21_err_report_f() |
3544                      gr_ds_hww_report_mask_sph22_err_report_f() |
3545                      gr_ds_hww_report_mask_sph23_err_report_f());
3546
3547         /* TBD: ECC for L1/SM */
3548         /* TBD: enable per GPC exceptions */
3549         /* TBD: enable per BE exceptions */
3550
3551         /* reset and enable all exceptions */
3552         gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
3553         gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
3554         gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
3555         gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
3556         gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
3557         gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
3558
3559         /* ignore status from some units */
3560         data = gk20a_readl(g, gr_status_mask_r());
3561         gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
3562
3563         gr_gk20a_init_zbc(g, gr);
3564
3565         compbit_base_post_divide = u64_lo32(
3566                 (gr->compbit_store.base_pa >>
3567                         ltc_ltc0_lts0_cbc_base_alignment_shift_v()) /
3568                         gr->num_fbps);
3569
3570         compbit_base_post_multiply = ((u64)compbit_base_post_divide *
3571                 gr->num_fbps) << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
3572
3573         if (compbit_base_post_multiply < gr->compbit_store.base_pa)
3574                 compbit_base_post_divide++;
3575
3576         gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
3577                 compbit_base_post_divide);
3578
3579         /* load ctx init */
3580         for (i = 0; i < sw_ctx_load->count; i++)
3581                 gk20a_writel(g, sw_ctx_load->l[i].addr,
3582                              sw_ctx_load->l[i].value);
3583
3584         /* TBD: add gr ctx overrides */
3585
3586         err = gr_gk20a_wait_idle(g, &timeout);
3587         if (err)
3588                 goto out;
3589
3590         /* save and disable fe_go_idle */
3591         fe_go_idle_timeout_save =
3592                 gk20a_readl(g, gr_fe_go_idle_timeout_r());
3593         gk20a_writel(g, gr_fe_go_idle_timeout_r(),
3594                 (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
3595                 gr_fe_go_idle_timeout_count_disabled_f());
3596
3597         /* override a few ctx state registers */
3598         gr_gk20a_commit_global_cb_manager(g, NULL, 0);
3599         gr_gk20a_commit_global_timeslice(g, NULL, 0);
3600
3601         /* floorsweep anything left */
3602         gr_gk20a_ctx_state_floorsweep(g);
3603
3604         err = gr_gk20a_wait_idle(g, &timeout);
3605         if (err)
3606                 goto restore_fe_go_idle;
3607
3608         /* enable pipe mode override */
3609         gk20a_writel(g, gr_pipe_bundle_config_r(),
3610                 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
3611
3612         /* load bundle init */
3613         err = 0;
3614         for (i = 0; i < sw_bundle_init->count; i++) {
3615
3616                 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
3617                         gk20a_writel(g, gr_pipe_bundle_data_r(),
3618                                 sw_bundle_init->l[i].value);
3619                         last_bundle_data = sw_bundle_init->l[i].value;
3620                 }
3621
3622                 gk20a_writel(g, gr_pipe_bundle_address_r(),
3623                              sw_bundle_init->l[i].addr);
3624
3625                 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
3626                     GR_GO_IDLE_BUNDLE)
3627                         err |= gr_gk20a_wait_idle(g, &timeout);
3628                 else if (0) { /* IS_SILICON */
3629                         do {
3630                                 u32 gr_status = gk20a_readl(g, gr_status_r());
3631                                 u32 check = min_t(u32, GR_IDLE_CHECK_PERIOD,
3632                                                   timeout);
3633
3634                                 if (gr_status_fe_method_lower_v(gr_status) ==
3635                                     gr_status_fe_method_lower_idle_v())
3636                                         break;
3637
3638                                 udelay(GR_IDLE_CHECK_PERIOD);
3639
3640                                 timeout -= check;
3641                         } while (timeout);
3642                 }
3643         }
3644
3645         /* disable pipe mode override */
3646         gk20a_writel(g, gr_pipe_bundle_config_r(),
3647                      gr_pipe_bundle_config_override_pipe_mode_disabled_f());
3648
3649 restore_fe_go_idle:
3650         /* restore fe_go_idle */
3651         gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
3652
3653         if (err || gr_gk20a_wait_idle(g, &timeout))
3654                 goto out;
3655
3656         /* load method init */
3657         if (sw_method_init->count) {
3658                 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3659                              sw_method_init->l[0].value);
3660                 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3661                              gr_pri_mme_shadow_raw_index_write_trigger_f() |
3662                              sw_method_init->l[0].addr);
3663                 last_method_data = sw_method_init->l[0].value;
3664         }
3665         for (i = 1; i < sw_method_init->count; i++) {
3666                 if (sw_method_init->l[i].value != last_method_data) {
3667                         gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3668                                 sw_method_init->l[i].value);
3669                         last_method_data = sw_method_init->l[i].value;
3670                 }
3671                 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3672                         gr_pri_mme_shadow_raw_index_write_trigger_f() |
3673                         sw_method_init->l[i].addr);
3674         }
3675
3676         err = gr_gk20a_wait_idle(g, &timeout);
3677         if (err)
3678                 goto out;
3679
3680 out:
3681         nvhost_dbg_fn("done");
3682         return 0;
3683 }
3684
3685 static int gk20a_init_gr_prepare(struct gk20a *g)
3686 {
3687         u32 gpfifo_ctrl, pmc_en;
3688         u32 err = 0;
3689
3690         /* disable fifo access */
3691         gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
3692         gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
3693         gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
3694
3695         /* reset gr engine */
3696         pmc_en = gk20a_readl(g, mc_enable_r());
3697         pmc_en &= ~mc_enable_pgraph_enabled_f();
3698         pmc_en &= ~mc_enable_blg_enabled_f();
3699         pmc_en &= ~mc_enable_perfmon_enabled_f();
3700         gk20a_writel(g, mc_enable_r(), pmc_en);
3701
3702         pmc_en = gk20a_readl(g, mc_enable_r());
3703         pmc_en |= mc_enable_pgraph_enabled_f();
3704         pmc_en |= mc_enable_blg_enabled_f();
3705         pmc_en |= mc_enable_perfmon_enabled_f();
3706         gk20a_writel(g, mc_enable_r(), pmc_en);
3707         pmc_en = gk20a_readl(g, mc_enable_r());
3708
3709         /* enable fifo access */
3710         gk20a_writel(g, gr_gpfifo_ctl_r(),
3711                 gr_gpfifo_ctl_access_enabled_f() |
3712                 gr_gpfifo_ctl_semaphore_access_enabled_f());
3713
3714         if (!g->gr.ctx_vars.valid) {
3715                 err = gr_gk20a_init_ctx_vars(g, &g->gr);
3716                 if (err)
3717                         nvhost_err(dev_from_gk20a(g),
3718                                 "fail to load gr init ctx");
3719         }
3720
3721         return err;
3722 }
3723
3724 static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
3725 {
3726         struct gr_gk20a *gr = &g->gr;
3727         struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
3728         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3729         u32 i, err = 0;
3730
3731         nvhost_dbg_fn("");
3732
3733         /* enable interrupts */
3734         gk20a_writel(g, gr_intr_r(), ~0);
3735         gk20a_writel(g, gr_intr_en_r(), ~0);
3736
3737         /* reset ctx switch state */
3738         gr_gk20a_ctx_reset(g, 0);
3739
3740         /* clear scc ram */
3741         gk20a_writel(g, gr_scc_init_r(),
3742                 gr_scc_init_ram_trigger_f());
3743
3744         /* load non_ctx init */
3745         for (i = 0; i < sw_non_ctx_load->count; i++)
3746                 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
3747                         sw_non_ctx_load->l[i].value);
3748
3749         err = gr_gk20a_wait_idle(g, &timeout);
3750         if (err)
3751                 goto out;
3752
3753         err = gr_gk20a_load_ctxsw_ucode(g, gr);
3754         if (err)
3755                 goto out;
3756
3757 out:
3758         if (err)
3759                 nvhost_dbg(dbg_fn | dbg_err, "fail");
3760         else
3761                 nvhost_dbg_fn("done");
3762
3763         return 0;
3764 }
3765
3766 static int gk20a_init_gr_setup_sw(struct gk20a *g, bool reinit)
3767 {
3768         struct gr_gk20a *gr = &g->gr;
3769         int err;
3770
3771         nvhost_dbg_fn("");
3772
3773         if (reinit) {
3774                 nvhost_dbg_fn("skip init");
3775                 return 0;
3776         }
3777
3778         gr->g = g;
3779
3780         err = gr_gk20a_init_gr_config(g, gr);
3781         if (err)
3782                 goto clean_up;
3783
3784         err = gr_gk20a_init_mmu_sw(g, gr);
3785         if (err)
3786                 goto clean_up;
3787
3788         err = gr_gk20a_init_map_tiles(g, gr);
3789         if (err)
3790                 goto clean_up;
3791
3792 #if CONFIG_GK20A_SIM
3793         gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
3794 #else
3795         nvhost_dbg_info("total ram pages : %lu", totalram_pages);
3796         gr->max_comptag_mem = totalram_pages >> (10 - (PAGE_SHIFT - 10));
3797 #endif
3798         err = gr_gk20a_init_comptag(g, gr);
3799         if (err)
3800                 goto clean_up;
3801
3802         err = gr_gk20a_init_zcull(g, gr);
3803         if (err)
3804                 goto clean_up;
3805
3806         err = gr_gk20a_init_ctx_state(g, gr);
3807         if (err)
3808                 goto clean_up;
3809
3810         err = gr_gk20a_alloc_global_ctx_buffers(g);
3811         if (err)
3812                 goto clean_up;
3813
3814         gr->remove_support = gk20a_remove_gr_support;
3815         nvhost_dbg_fn("done");
3816         return 0;
3817
3818 clean_up:
3819         nvhost_dbg(dbg_fn | dbg_err, "fail");
3820         gk20a_remove_gr_support(g, gr);
3821         return err;
3822 }
3823
3824 int gk20a_init_gr_support(struct gk20a *g, bool reinit)
3825 {
3826         struct gr_gk20a *gr = &g->gr;
3827         u32 err;
3828
3829         if (gr->initialized)
3830                 return 0;
3831
3832         err = gk20a_init_gr_prepare(g);
3833         if (err)
3834                 return err;
3835
3836         err = gk20a_init_gr_reset_enable_hw(g);
3837         if (err)
3838                 return err;
3839
3840         err = gk20a_init_gr_setup_sw(g, false);
3841         if (err)
3842                 return err;
3843
3844         err = gk20a_init_gr_setup_hw(g);
3845         if (err)
3846                 return err;
3847
3848         gr->initialized = true;
3849
3850         return 0;
3851 }
3852
3853 #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE   0x02dc
3854 #define NVA297_SET_CIRCULAR_BUFFER_SIZE         0x1280
3855 #define NVA297_SET_SHADER_EXCEPTIONS            0x1528
3856
3857 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
3858
3859 struct gr_isr_data {
3860         u32 addr;
3861         u32 data_lo;
3862         u32 data_hi;
3863         u32 curr_ctx;
3864         u32 chid;
3865         u32 offset;
3866         u32 sub_chan;
3867         u32 class_num;
3868 };
3869
3870 static void gk20a_gr_set_shader_exceptions(struct gk20a *g,
3871                                            struct gr_isr_data *isr_data)
3872 {
3873         u32 val;
3874
3875         nvhost_dbg_fn("");
3876
3877         if (isr_data->data_lo ==
3878             NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE)
3879                 val = 0;
3880         else
3881                 val = ~0;
3882
3883         gk20a_writel(g,
3884                 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
3885                 val);
3886         gk20a_writel(g,
3887                 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
3888                 val);
3889 }
3890
3891 static void gk20a_gr_set_circular_buffer_size(struct gk20a *g,
3892                         struct gr_isr_data *isr_data)
3893 {
3894         struct gr_gk20a *gr = &g->gr;
3895         u32 gpc_index, ppc_index, stride, val, offset;
3896         u32 cb_size = isr_data->data_lo * 4;
3897
3898         nvhost_dbg_fn("");
3899
3900         if (cb_size > gr->attrib_cb_size)
3901                 cb_size = gr->attrib_cb_size;
3902
3903         gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3904                 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3905                  ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
3906                  gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
3907
3908         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3909                 stride = proj_gpc_stride_v() * gpc_index;
3910
3911                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3912                         ppc_index++) {
3913
3914                         val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
3915                                 stride +
3916                                 proj_ppc_in_gpc_stride_v() * ppc_index);
3917
3918                         offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
3919
3920                         val = set_field(val,
3921                                 gr_gpc0_ppc0_cbm_cfg_size_m(),
3922                                 gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
3923                                         gr->pes_tpc_count[ppc_index][gpc_index]));
3924                         val = set_field(val,
3925                                 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3926                                 (offset + 1));
3927
3928                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3929                                 stride +
3930                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3931
3932                         val = set_field(val,
3933                                 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3934                                 offset);
3935
3936                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3937                                 stride +
3938                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3939                 }
3940         }
3941 }
3942
3943 static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g,
3944                                                 struct gr_isr_data *isr_data)
3945 {
3946         struct gr_gk20a *gr = &g->gr;
3947         u32 gpc_index, ppc_index, stride, val;
3948         u32 pd_ab_max_output;
3949         u32 alpha_cb_size = isr_data->data_lo * 4;
3950
3951         nvhost_dbg_fn("");
3952         /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
3953                 return; */
3954
3955         if (alpha_cb_size > gr->alpha_cb_size)
3956                 alpha_cb_size = gr->alpha_cb_size;
3957
3958         gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3959                 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3960                  ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
3961                  gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
3962
3963         pd_ab_max_output = alpha_cb_size *
3964                 gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
3965                 gr_pd_ab_dist_cfg1_max_output_granularity_v();
3966
3967         gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
3968                 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
3969
3970         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3971                 stride = proj_gpc_stride_v() * gpc_index;
3972
3973                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3974                         ppc_index++) {
3975
3976                         val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3977                                 stride +
3978                                 proj_ppc_in_gpc_stride_v() * ppc_index);
3979
3980                         val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
3981                                         gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
3982                                                 gr->pes_tpc_count[ppc_index][gpc_index]));
3983
3984                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3985                                 stride +
3986                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3987                 }
3988         }
3989 }
3990
3991 static int gk20a_gr_handle_illegal_method(struct gk20a *g,
3992                                           struct gr_isr_data *isr_data)
3993 {
3994         nvhost_dbg_fn("");
3995
3996         if (isr_data->class_num == KEPLER_C) {
3997                 switch (isr_data->offset << 2) {
3998                 case NVA297_SET_SHADER_EXCEPTIONS:
3999                         gk20a_gr_set_shader_exceptions(g, isr_data);
4000                         break;
4001                 case NVA297_SET_CIRCULAR_BUFFER_SIZE:
4002                         gk20a_gr_set_circular_buffer_size(g, isr_data);
4003                         break;
4004                 case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
4005                         gk20a_gr_set_alpha_circular_buffer_size(g, isr_data);
4006                         break;
4007                 default:
4008                         nvhost_err(dev_from_gk20a(g), "invalid method "
4009                                    "class 0x%08x, offset 0x%08x",
4010                                    isr_data->class_num, isr_data->offset);
4011                         return -EINVAL;
4012                 }
4013                 return 0;
4014         }
4015
4016         nvhost_err(dev_from_gk20a(g),
4017                    "invalid method class 0x%08x, offset 0x%08x",
4018                    isr_data->class_num, isr_data->offset);
4019         return -EINVAL;
4020 }
4021
4022 static int gk20a_gr_handle_notify_pending(struct gk20a *g,
4023                                           struct gr_isr_data *isr_data)
4024 {
4025         struct fifo_gk20a *f = &g->fifo;
4026         struct channel_gk20a *ch = &f->channel[isr_data->chid];
4027
4028         nvhost_dbg_fn("");
4029
4030         wake_up(&ch->notifier_wq);
4031
4032         return 0;
4033 }
4034
4035 /* Used by sw interrupt thread to translate current ctx to chid.
4036  * For performance, we don't want to go through 128 channels every time.
4037  * A small tlb is used here to cache translation */
4038 static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
4039 {
4040         struct fifo_gk20a *f = &g->fifo;
4041         struct gr_gk20a *gr = &g->gr;
4042         u32 chid;
4043         u32 i;
4044
4045         /* check cache first */
4046         for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++)
4047                 if (gr->chid_tlb[i].curr_ctx == curr_ctx)
4048                         return gr->chid_tlb[i].hw_chid;
4049
4050         /* slow path */
4051         for (chid = 0; chid < f->num_channels; chid++)
4052                 if (f->channel[chid].inst_block.cpu_pa ==
4053                     curr_ctx << ram_in_base_shift_v())
4054                         break;
4055
4056         if (chid >= f->num_channels)
4057                 return -1;
4058
4059         /* add to free tlb entry */
4060         for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++)
4061                 if (gr->chid_tlb[i].curr_ctx == 0) {
4062                         gr->chid_tlb[i].curr_ctx = curr_ctx;
4063                         gr->chid_tlb[i].hw_chid = chid;
4064                         return chid;
4065                 }
4066
4067         /* no free entry, flush one */
4068         gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
4069         gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
4070
4071         gr->channel_tlb_flush_index =
4072                 (gr->channel_tlb_flush_index + 1) &
4073                 (GR_CHANNEL_MAP_TLB_SIZE - 1);
4074
4075         return chid;
4076 }
4077
4078 void gk20a_gr_isr(struct gk20a *g)
4079 {
4080         struct gr_isr_data isr_data;
4081         u32 grfifo_ctl;
4082         u32 obj_table;
4083         u32 ret;
4084         u32 gr_intr = gk20a_readl(g, gr_intr_r());
4085
4086         nvhost_dbg_fn("");
4087
4088         if (!gr_intr)
4089                 return;
4090
4091         grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
4092         grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
4093         grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
4094
4095         gk20a_writel(g, gr_gpfifo_ctl_r(),
4096                 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
4097                 gr_gpfifo_ctl_semaphore_access_f(0));
4098
4099         isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
4100         isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
4101         isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
4102         isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
4103         isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
4104         isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
4105         obj_table = gk20a_readl(g,
4106                 gr_fe_object_table_r(isr_data.sub_chan));
4107         isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
4108
4109         isr_data.chid =
4110                 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
4111         if (isr_data.chid == -1) {
4112                 nvhost_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
4113                            isr_data.curr_ctx);
4114                 goto clean_up;
4115         }
4116
4117         nvhost_dbg(dbg_intr, "channel %d: addr 0x%08x, "
4118                 "data 0x%08x 0x%08x,"
4119                 "ctx 0x%08x, offset 0x%08x, "
4120                 "subchannel 0x%08x, class 0x%08x",
4121                 isr_data.chid, isr_data.addr,
4122                 isr_data.data_hi, isr_data.data_lo,
4123                 isr_data.curr_ctx, isr_data.offset,
4124                 isr_data.sub_chan, isr_data.class_num);
4125
4126         if (gr_intr & gr_intr_notify_pending_f()) {
4127                 gk20a_gr_handle_notify_pending(g, &isr_data);
4128                 gk20a_writel(g, gr_intr_r(),
4129                         gr_intr_notify_reset_f());
4130                 gr_intr &= ~gr_intr_notify_pending_f();
4131         }
4132
4133         if (gr_intr & gr_intr_illegal_method_pending_f()) {
4134                 ret = gk20a_gr_handle_illegal_method(g, &isr_data);
4135                 if (!ret) {
4136                         gk20a_writel(g, gr_intr_r(),
4137                                 gr_intr_illegal_method_reset_f());
4138                         gr_intr &= ~gr_intr_illegal_method_pending_f();
4139                 }
4140         }
4141
4142 clean_up:
4143         gk20a_writel(g, gr_gpfifo_ctl_r(),
4144                 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
4145                 gr_gpfifo_ctl_semaphore_access_f(1));
4146
4147         if (gr_intr)
4148                 nvhost_err(dev_from_gk20a(g),
4149                            "unhandled gr interrupt 0x%08x", gr_intr);
4150 }
4151
4152 int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
4153 {
4154         BUG_ON(size == NULL);
4155         return gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 1,
4156                 gr_fecs_method_push_adr_discover_reglist_image_size_v(),
4157                 size, GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
4158 }
4159
4160 int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
4161 {
4162         return gr_gk20a_submit_fecs_method(g, 4,
4163                 gr_fecs_current_ctx_ptr_f(addr >> 12) |
4164                 gr_fecs_current_ctx_valid_f(1) | gr_fecs_current_ctx_target_vid_mem_f(),
4165                 ~0, 1, gr_fecs_method_push_adr_set_reglist_bind_instance_f(),
4166                 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4167 }
4168
4169 int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va)
4170 {
4171         return gr_gk20a_submit_fecs_method(g, 4, u64_lo32(pmu_va >> 8),
4172                 ~0, 1, gr_fecs_method_push_adr_set_reglist_virtual_address_f(),
4173                 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4174 }