7d9a2fa742da80dee0942ac20f69e444acdb7bc7
[linux-3.10.git] / drivers / video / tegra / host / gk20a / gr_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/gr_gk20a.c
3  *
4  * GK20A Graphics
5  *
6  * Copyright (c) 2011, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>        /* for udelay */
23 #include <linux/mm.h>           /* for totalram_pages */
24 #include <linux/nvmap.h>
25
26 #include "../dev.h"
27
28 #include "gk20a.h"
29 #include "gr_ctx_gk20a.h"
30
31 #include "hw_ccsr_gk20a.h"
32 #include "hw_ctxsw_prog_gk20a.h"
33 #include "hw_gr_gk20a.h"
34 #include "hw_mc_gk20a.h"
35 #include "hw_ram_gk20a.h"
36 #include "hw_pri_ringmaster_gk20a.h"
37 #include "hw_proj_gk20a.h"
38 #include "hw_top_gk20a.h"
39 #include "hw_ltc_gk20a.h"
40 #include "hw_fb_gk20a.h"
41 #include "hw_therm_gk20a.h"
42 #include "gk20a_gating_reglist.h"
43 #include "chip_support.h"
44 #include "nvhost_memmgr.h"
45
46 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
47 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
48                                     u32 addr, u32 data, u32 patch);
49
50 /* global ctx buffer */
51 static int  gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
52 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
53 static int  gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
54                                             struct channel_gk20a *c);
55 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c);
56
57 /* channel gr ctx buffer */
58 static int  gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
59                                         struct channel_gk20a *c);
60 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
61
62 /* channel patch ctx buffer */
63 static int  gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
64                                         struct channel_gk20a *c);
65 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c);
66
67 /* golden ctx image */
68 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
69                                           struct channel_gk20a *c);
70 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
71                                           struct channel_gk20a *c);
72
73 static void gr_gk20a_load_falcon_dmem(struct gk20a *g)
74 {
75         u32 i, ucode_u32_size;
76         const u32 *ucode_u32_data;
77         u32 checksum;
78
79         nvhost_dbg_fn("");
80
81         gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) |
82                                               gr_gpccs_dmemc_blk_f(0)  |
83                                               gr_gpccs_dmemc_aincw_f(1)));
84
85         ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.data.count;
86         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.data.l;
87
88         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
89                 gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]);
90                 checksum += ucode_u32_data[i];
91         }
92
93         gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) |
94                                              gr_fecs_dmemc_blk_f(0)  |
95                                              gr_fecs_dmemc_aincw_f(1)));
96
97         ucode_u32_size = g->gr.ctx_vars.ucode.fecs.data.count;
98         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.data.l;
99
100         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
101                 gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]);
102                 checksum += ucode_u32_data[i];
103         }
104         nvhost_dbg_fn("done");
105 }
106
107 static void gr_gk20a_load_falcon_imem(struct gk20a *g)
108 {
109         u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size;
110         const u32 *ucode_u32_data;
111         u32 tag, i, pad_start, pad_end;
112         u32 checksum;
113
114         nvhost_dbg_fn("");
115
116         cfg = gk20a_readl(g, gr_fecs_cfg_r());
117         fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg);
118
119         cfg = gk20a_readl(g, gr_gpc0_cfg_r());
120         gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg);
121
122         /* Use the broadcast address to access all of the GPCCS units. */
123         gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) |
124                                               gr_gpccs_imemc_blk_f(0) |
125                                               gr_gpccs_imemc_aincw_f(1)));
126
127         /* Setup the tags for the instruction memory. */
128         tag = 0;
129         gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag));
130
131         ucode_u32_size = g->gr.ctx_vars.ucode.gpccs.inst.count;
132         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.gpccs.inst.l;
133
134         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
135                 if (i && ((i % (256/sizeof(u32))) == 0)) {
136                         tag++;
137                         gk20a_writel(g, gr_gpccs_imemt_r(0),
138                                       gr_gpccs_imemt_tag_f(tag));
139                 }
140                 gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]);
141                 checksum += ucode_u32_data[i];
142         }
143
144         pad_start = i*4;
145         pad_end = pad_start+(256-pad_start%256)+256;
146         for (i = pad_start;
147              (i < gpccs_imem_size * 256) && (i < pad_end);
148              i += 4) {
149                 if (i && ((i % 256) == 0)) {
150                         tag++;
151                         gk20a_writel(g, gr_gpccs_imemt_r(0),
152                                       gr_gpccs_imemt_tag_f(tag));
153                 }
154                 gk20a_writel(g, gr_gpccs_imemd_r(0), 0);
155         }
156
157         gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) |
158                                              gr_fecs_imemc_blk_f(0) |
159                                              gr_fecs_imemc_aincw_f(1)));
160
161         /* Setup the tags for the instruction memory. */
162         tag = 0;
163         gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag));
164
165         ucode_u32_size = g->gr.ctx_vars.ucode.fecs.inst.count;
166         ucode_u32_data = (const u32 *)g->gr.ctx_vars.ucode.fecs.inst.l;
167
168         for (i = 0, checksum = 0; i < ucode_u32_size; i++) {
169                 if (i && ((i % (256/sizeof(u32))) == 0)) {
170                         tag++;
171                         gk20a_writel(g, gr_fecs_imemt_r(0),
172                                       gr_fecs_imemt_tag_f(tag));
173                 }
174                 gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]);
175                 checksum += ucode_u32_data[i];
176         }
177
178         pad_start = i*4;
179         pad_end = pad_start+(256-pad_start%256)+256;
180         for (i = pad_start; (i < fecs_imem_size * 256) && i < pad_end; i += 4) {
181                 if (i && ((i % 256) == 0)) {
182                         tag++;
183                         gk20a_writel(g, gr_fecs_imemt_r(0),
184                                       gr_fecs_imemt_tag_f(tag));
185                 }
186                 gk20a_writel(g, gr_fecs_imemd_r(0), 0);
187         }
188 }
189
190 #define GR_IDLE_TIMEOUT_DEFAULT 10000   /* 10 milliseconds */
191
192 static int gr_gk20a_wait_idle(struct gk20a *g, u32 *timeout)
193 {
194 #define GR_ENGINE_INDEX         0
195 #define GR_IDLE_CHECK_PERIOD    10              /* 10 usec */
196
197         u32 gr_engine_status;
198         u32 gr_status;
199         bool ctxsw_active = false;
200
201         nvhost_dbg_fn("");
202
203         do {
204                 u32 check = min_t(u32, GR_IDLE_CHECK_PERIOD, *timeout);
205
206                 /* fmodel: host gets fifo_engine_status(gr) from gr
207                    only when gr_status is read */
208                 gr_status = gk20a_readl(g, gr_status_r());
209
210                 gr_engine_status = gk20a_readl(g, gr_engine_status_r());
211
212                 if (!(gk20a_readl(g, mc_enable_r()) &
213                       mc_enable_pgraph_enabled_f()) ||
214                     (gr_engine_status_value_v(gr_engine_status) ==
215                      gr_engine_status_value_idle_v() &&
216                      !ctxsw_active)) {
217                         nvhost_dbg_fn("done");
218                         return 0;
219                 }
220
221                 udelay(GR_IDLE_CHECK_PERIOD);
222
223                 /* handle interrupts */
224
225                 *timeout -= check;
226
227         } while (*timeout);
228
229         nvhost_err(dev_from_gk20a(g), "timeout, status: %d",
230                    gr_engine_status);
231
232         return -1;
233 }
234
235 static int gr_gk20a_ctx_reset(struct gk20a *g, u32 rst_mask)
236 {
237         nvhost_dbg_fn("");
238         /* FE_PWR_MODE_MODE_FORCE_ON for RTLSim and EMulation? */
239
240         if (rst_mask) {
241                 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(), rst_mask);
242         } else {
243                 gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
244                              gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
245                              gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
246                              gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f()  |
247                              gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
248                              gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
249                              gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f()  |
250                              gr_fecs_ctxsw_reset_ctl_sys_context_reset_enabled_f() |
251                              gr_fecs_ctxsw_reset_ctl_gpc_context_reset_enabled_f() |
252                              gr_fecs_ctxsw_reset_ctl_be_context_reset_enabled_f());
253         }
254
255         /* Delay for > 10 nvclks after writing reset. */
256         gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
257
258         gk20a_writel(g, gr_fecs_ctxsw_reset_ctl_r(),
259                      gr_fecs_ctxsw_reset_ctl_sys_halt_disabled_f() |
260                      gr_fecs_ctxsw_reset_ctl_gpc_halt_disabled_f() |
261                      gr_fecs_ctxsw_reset_ctl_be_halt_disabled_f()  |
262                      gr_fecs_ctxsw_reset_ctl_sys_engine_reset_disabled_f() |
263                      gr_fecs_ctxsw_reset_ctl_gpc_engine_reset_disabled_f() |
264                      gr_fecs_ctxsw_reset_ctl_be_engine_reset_disabled_f()  |
265                      gr_fecs_ctxsw_reset_ctl_sys_context_reset_disabled_f() |
266                      gr_fecs_ctxsw_reset_ctl_gpc_context_reset_disabled_f() |
267                      gr_fecs_ctxsw_reset_ctl_be_context_reset_disabled_f());
268
269         /* Delay for > 10 nvclks after writing reset. */
270         gk20a_readl(g, gr_fecs_ctxsw_reset_ctl_r());
271
272         /* FE_PWR_MODE_MODE_AUTO for RTLSim and EMulation? */
273
274         return 0;
275 }
276
277 static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
278                                    u32 *mailbox_ret, u32 opc_success,
279                                    u32 mailbox_ok, u32 opc_fail,
280                                    u32 mailbox_fail)
281 {
282         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
283         u32 check = WAIT_UCODE_LOOP;
284         u32 reg;
285
286         nvhost_dbg_fn("");
287
288         while (check == WAIT_UCODE_LOOP) {
289                 if (timeout == 0)
290                         check = WAIT_UCODE_TIMEOUT;
291
292                 /* XXX when this register read was sped up by removing printks
293                  * (in sim) we had to increase GR_IDLE_TIMEOUT_DEFAULT in order
294                  * not to get spurious timeouts... that says to me udelay is
295                  * not doing what we think below...? */
296                 reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(mailbox_id));
297
298                 if (mailbox_ret)
299                         *mailbox_ret = reg;
300
301                 switch (opc_success) {
302                 case GR_IS_UCODE_OP_EQUAL:
303                         if (reg == mailbox_ok)
304                                 check = WAIT_UCODE_OK;
305                         break;
306                 case GR_IS_UCODE_OP_NOT_EQUAL:
307                         if (reg != mailbox_ok)
308                                 check = WAIT_UCODE_OK;
309                         break;
310                 case GR_IS_UCODE_OP_AND:
311                         if (reg & mailbox_ok)
312                                 check = WAIT_UCODE_OK;
313                         break;
314                 case GR_IS_UCODE_OP_LESSER:
315                         if (reg < mailbox_ok)
316                                 check = WAIT_UCODE_OK;
317                         break;
318                 case GR_IS_UCODE_OP_LESSER_EQUAL:
319                         if (reg <= mailbox_ok)
320                                 check = WAIT_UCODE_OK;
321                         break;
322                 case GR_IS_UCODE_OP_SKIP:
323                         /* do no success check */
324                         break;
325                 default:
326                         nvhost_err(dev_from_gk20a(g),
327                                    "invalid success opcode 0x%x", opc_success);
328
329                         check = WAIT_UCODE_ERROR;
330                         break;
331                 }
332
333                 switch (opc_fail) {
334                 case GR_IS_UCODE_OP_EQUAL:
335                         if (reg == mailbox_fail)
336                                 check = WAIT_UCODE_ERROR;
337                         break;
338                 case GR_IS_UCODE_OP_NOT_EQUAL:
339                         if (reg != mailbox_fail)
340                                 check = WAIT_UCODE_ERROR;
341                         break;
342                 case GR_IS_UCODE_OP_AND:
343                         if (reg & mailbox_fail)
344                                 check = WAIT_UCODE_ERROR;
345                         break;
346                 case GR_IS_UCODE_OP_LESSER:
347                         if (reg < mailbox_fail)
348                                 check = WAIT_UCODE_ERROR;
349                         break;
350                 case GR_IS_UCODE_OP_LESSER_EQUAL:
351                         if (reg <= mailbox_fail)
352                                 check = WAIT_UCODE_ERROR;
353                         break;
354                 case GR_IS_UCODE_OP_SKIP:
355                         /* do no check on fail*/
356                         break;
357                 default:
358                         nvhost_err(dev_from_gk20a(g),
359                                    "invalid fail opcode 0x%x", opc_fail);
360                         check = WAIT_UCODE_ERROR;
361                         break;
362                 }
363
364                 udelay(10);
365                 timeout -= min_t(u32, GR_IDLE_CHECK_PERIOD, timeout);
366         }
367
368         if (check == WAIT_UCODE_TIMEOUT) {
369                 nvhost_err(dev_from_gk20a(g),
370                            "timeout waiting on ucode response");
371                 return -1;
372         } else if (check == WAIT_UCODE_ERROR) {
373                 nvhost_err(dev_from_gk20a(g),
374                            "ucode method failed on mailbox=%d value=0x%08x",
375                            mailbox_id, reg);
376                 return -1;
377         }
378
379         nvhost_dbg_fn("done");
380         return 0;
381 }
382
383 int gr_gk20a_submit_fecs_method(struct gk20a *g,
384                         u32 mb_id, u32 mb_data, u32 mb_clr,
385                         u32 mtd_data, u32 mtd_adr, u32 *mb_ret,
386                         u32 opc_ok, u32 mb_ok, u32 opc_fail, u32 mb_fail)
387 {
388         if (mb_id != 0)
389                 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(mb_id),
390                         mb_data);
391
392         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
393                 gr_fecs_ctxsw_mailbox_clear_value_f(mb_clr));
394
395         gk20a_writel(g, gr_fecs_method_data_r(), mtd_data);
396         gk20a_writel(g, gr_fecs_method_push_r(),
397                 gr_fecs_method_push_adr_f(mtd_adr));
398
399         return gr_gk20a_ctx_wait_ucode(g, 0, mb_ret,
400                 opc_ok, mb_ok, opc_fail, mb_fail);
401 }
402
403 static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
404 {
405         u32 addr_lo;
406         u32 addr_hi;
407         u32 ret = 0;
408         void *inst_ptr = NULL;
409
410         nvhost_dbg_fn("");
411
412         inst_ptr = mem_op().mmap(c->inst_block.mem.ref);
413         if (IS_ERR(inst_ptr)) {
414                 ret = -ENOMEM;
415                 goto clean_up;
416         }
417
418         addr_lo = u64_lo32(gpu_va) >> 12;
419         addr_hi = u64_hi32(gpu_va);
420
421         mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(),
422                  ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() |
423                  ram_in_gr_wfi_ptr_lo_f(addr_lo));
424
425         mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
426                  ram_in_gr_wfi_ptr_hi_f(addr_hi));
427
428         mem_op().munmap(c->inst_block.mem.ref, inst_ptr);
429
430         return 0;
431
432 clean_up:
433         if (inst_ptr)
434                 mem_op().munmap(c->inst_block.mem.ref, inst_ptr);
435
436         return ret;
437 }
438
439 static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
440                                     u32 addr, u32 data, u32 patch)
441 {
442         struct channel_ctx_gk20a *ch_ctx;
443         u32 patch_slot = 0;
444         void *patch_ptr = NULL;
445
446         nvhost_dbg_fn("");
447
448         BUG_ON(patch != 0 && c == NULL);
449
450         if (patch) {
451                 ch_ctx = &c->ch_ctx;
452                 patch_ptr = mem_op().mmap(ch_ctx->patch_ctx.mem.ref);
453                 if (IS_ERR(patch_ptr))
454                         return -ENOMEM;
455
456                 patch_slot = ch_ctx->patch_ctx.data_count * 2;
457
458                 mem_wr32(patch_ptr, patch_slot++, addr);
459                 mem_wr32(patch_ptr, patch_slot++, data);
460
461                 mem_op().munmap(ch_ctx->patch_ctx.mem.ref, patch_ptr);
462                 ch_ctx->patch_ctx.data_count++;
463         } else {
464                 gk20a_writel(g, addr, data);
465         }
466
467         return 0;
468 }
469
470 static int gr_gk20a_ctx_bind_first_channel(struct gk20a *g,
471                                         struct channel_gk20a *c)
472 {
473         u32 inst_base_ptr =
474                 u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
475         u32 ret;
476
477         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
478                    c->hw_chid, inst_base_ptr);
479
480         ret = gr_gk20a_submit_fecs_method(g, 0, 0, 0x30,
481                         gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
482                         gr_fecs_current_ctx_target_vid_mem_f() |
483                         gr_fecs_current_ctx_valid_f(1),
484                         gr_fecs_method_push_adr_bind_pointer_f(),
485                         0, GR_IS_UCODE_OP_AND, 0x10, GR_IS_UCODE_OP_AND, 0x20);
486         if (ret)
487                 nvhost_err(dev_from_gk20a(g),
488                         "bind channel instance failed");
489
490         return ret;
491 }
492
493 static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
494                                     bool disable_fifo)
495 {
496         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
497         struct fifo_gk20a *f = &g->fifo;
498         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
499         u32 va_lo, va_hi, va;
500         int ret = 0;
501         void *ctx_ptr = NULL;
502
503         nvhost_dbg_fn("");
504
505         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
506         if (IS_ERR(ctx_ptr))
507                 return -ENOMEM;
508
509         if (ch_ctx->zcull_ctx.gpu_va == 0 &&
510             ch_ctx->zcull_ctx.ctx_sw_mode ==
511                 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
512                 ret = -EINVAL;
513                 goto clean_up;
514         }
515
516         va_lo = u64_lo32(ch_ctx->zcull_ctx.gpu_va);
517         va_hi = u64_hi32(ch_ctx->zcull_ctx.gpu_va);
518         va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
519
520         if (disable_fifo) {
521                 ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
522                 if (ret) {
523                         nvhost_err(dev_from_gk20a(g),
524                                 "failed to disable gr engine activity\n");
525                         goto clean_up;
526                 }
527         }
528
529         mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_v(), 0,
530                  ch_ctx->zcull_ctx.ctx_sw_mode);
531
532         mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, va);
533
534         if (disable_fifo) {
535                 ret = gk20a_fifo_enable_engine_activity(g, gr_info);
536                 if (ret) {
537                         nvhost_err(dev_from_gk20a(g),
538                                 "failed to enable gr engine activity\n");
539                         goto clean_up;
540                 }
541         }
542
543 clean_up:
544         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
545
546         return ret;
547 }
548
549 static int gr_gk20a_ctx_pm_setup(struct gk20a *g, struct channel_gk20a *c,
550                                  bool disable_fifo)
551 {
552         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
553         u32 va_lo, va_hi, va;
554         int ret;
555         void *ctx_ptr = NULL;
556
557         nvhost_dbg_fn("");
558
559         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
560         if (IS_ERR(ctx_ptr))
561                 return -ENOMEM;
562
563         if (ch_ctx->pm_ctx.ctx_sw_mode ==
564             ctxsw_prog_main_image_pm_mode_ctxsw_v()) {
565
566                 if (ch_ctx->pm_ctx.gpu_va == 0) {
567                         ret = -ENOMEM;
568                         goto clean_up;
569                 }
570
571                 va_lo = u64_lo32(ch_ctx->pm_ctx.gpu_va);
572                 va_hi = u64_hi32(ch_ctx->pm_ctx.gpu_va);
573                 va = ((va_lo >> 8) & 0x00FFFFFF) | ((va_hi << 24) & 0xFF000000);
574         } else {
575                 va_lo = va_hi = 0;
576                 va = 0;
577         }
578
579         /* TBD
580         if (disable_fifo)
581                 disable_engine_activity(...);
582         */
583
584         mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_v(), 0, ch_ctx->pm_ctx.ctx_sw_mode);
585         mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_v(), 0, va);
586
587         /* TBD
588         if (disable_fifo)
589                 enable_engine_activity(...);
590         */
591
592         nvhost_dbg_fn("done");
593
594 clean_up:
595         nvhost_dbg_fn("fail");
596         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
597
598         return ret;
599 }
600
601 static int gr_gk20a_commit_global_cb_manager(struct gk20a *g,
602                         struct channel_gk20a *c, u32 patch)
603 {
604         struct gr_gk20a *gr = &g->gr;
605         u32 attrib_offset_in_chunk = 0;
606         u32 alpha_offset_in_chunk = 0;
607         u32 pd_ab_max_output;
608         u32 gpc_index, ppc_index;
609         u32 temp;
610         u32 cbm_cfg_size1, cbm_cfg_size2;
611
612         nvhost_dbg_fn("");
613
614         gr_gk20a_ctx_patch_write(g, c, gr_ds_tga_constraintlogic_r(),
615                 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
616                 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
617                 patch);
618
619         pd_ab_max_output = (gr->alpha_cb_default_size *
620                 gr_gpc0_ppc0_cbm_cfg_size_granularity_v()) /
621                 gr_pd_ab_dist_cfg1_max_output_granularity_v();
622
623         gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg1_r(),
624                 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
625                 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
626
627         alpha_offset_in_chunk = attrib_offset_in_chunk +
628                 gr->tpc_count * gr->attrib_cb_size;
629
630         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
631                 temp = proj_gpc_stride_v() * gpc_index;
632                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
633                      ppc_index++) {
634                         cbm_cfg_size1 = gr->attrib_cb_default_size *
635                                 gr->pes_tpc_count[ppc_index][gpc_index];
636                         cbm_cfg_size2 = gr->alpha_cb_default_size *
637                                 gr->pes_tpc_count[ppc_index][gpc_index];
638
639                         gr_gk20a_ctx_patch_write(g, c,
640                                 gr_gpc0_ppc0_cbm_cfg_r() + temp +
641                                 proj_ppc_in_gpc_stride_v() * ppc_index,
642                                 gr_gpc0_ppc0_cbm_cfg_timeslice_mode_f(gr->timeslice_mode) |
643                                 gr_gpc0_ppc0_cbm_cfg_start_offset_f(attrib_offset_in_chunk) |
644                                 gr_gpc0_ppc0_cbm_cfg_size_f(cbm_cfg_size1), patch);
645
646                         attrib_offset_in_chunk += gr->attrib_cb_size *
647                                 gr->pes_tpc_count[ppc_index][gpc_index];
648
649                         gr_gk20a_ctx_patch_write(g, c,
650                                 gr_gpc0_ppc0_cbm_cfg2_r() + temp +
651                                 proj_ppc_in_gpc_stride_v() * ppc_index,
652                                 gr_gpc0_ppc0_cbm_cfg2_start_offset_f(alpha_offset_in_chunk) |
653                                 gr_gpc0_ppc0_cbm_cfg2_size_f(cbm_cfg_size2), patch);
654
655                         alpha_offset_in_chunk += gr->alpha_cb_size *
656                                 gr->pes_tpc_count[ppc_index][gpc_index];
657                 }
658         }
659
660         return 0;
661 }
662
663 static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
664                         struct channel_gk20a *c, u32 patch)
665 {
666         struct gr_gk20a *gr = &g->gr;
667         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
668         u64 addr;
669         u32 size;
670         u32 data;
671
672         nvhost_dbg_fn("");
673
674         /* global pagepool */
675         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
676                 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
677                 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
678                  (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
679
680         size = gr->global_ctx_buffer[PAGEPOOL].size /
681                 gr_scc_pagepool_total_pages_byte_granularity_v();
682
683         if (size == gr_scc_pagepool_total_pages_hwmax_value_v())
684                 size = gr_scc_pagepool_total_pages_hwmax_v();
685
686         nvhost_dbg_info("pagepool addr : 0x%016llx, size : %d",
687                 addr, size);
688
689         gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_base_r(),
690                 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
691
692         gr_gk20a_ctx_patch_write(g, c, gr_scc_pagepool_r(),
693                 gr_scc_pagepool_total_pages_f(size) |
694                 gr_scc_pagepool_valid_true_f(), patch);
695
696         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_base_r(),
697                 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
698
699         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gcc_pagepool_r(),
700                 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
701
702         gr_gk20a_ctx_patch_write(g, c, gr_pd_pagepool_r(),
703                 gr_pd_pagepool_total_pages_f(size) |
704                 gr_pd_pagepool_valid_true_f(), patch);
705
706         /* global bundle cb */
707         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
708                 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
709                 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
710                  (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
711
712         size = gr->bundle_cb_default_size;
713
714         nvhost_dbg_info("global bundle cb addr : 0x%016llx, size : %d",
715                 addr, size);
716
717         gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_base_r(),
718                 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
719
720         gr_gk20a_ctx_patch_write(g, c, gr_scc_bundle_cb_size_r(),
721                 gr_scc_bundle_cb_size_div_256b_f(size) |
722                 gr_scc_bundle_cb_size_valid_true_f(), patch);
723
724         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_base_r(),
725                 gr_gpcs_setup_bundle_cb_base_addr_39_8_f(addr), patch);
726
727         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_bundle_cb_size_r(),
728                 gr_gpcs_setup_bundle_cb_size_div_256b_f(size) |
729                 gr_gpcs_setup_bundle_cb_size_valid_true_f(), patch);
730
731         /* data for state_limit */
732         data = (gr->bundle_cb_default_size *
733                 gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) /
734                 gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v();
735
736         data = min_t(u32, data, gr->min_gpm_fifo_depth);
737
738         nvhost_dbg_info("global bundle cb token limit : %d, state limit : %d",
739                    gr->bundle_cb_token_limit, data);
740
741         gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg2_r(),
742                 gr_pd_ab_dist_cfg2_token_limit_f(gr->bundle_cb_token_limit) |
743                 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
744
745         /* global attrib cb */
746         addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
747                 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
748                 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
749                  (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
750
751         nvhost_dbg_info("global attrib cb addr : 0x%016llx", addr);
752
753         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_setup_attrib_cb_base_r(),
754                 gr_gpcs_setup_attrib_cb_base_addr_39_12_f(addr) |
755                 gr_gpcs_setup_attrib_cb_base_valid_true_f(), patch);
756
757         gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_pin_cb_global_base_addr_r(),
758                 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_v_f(addr) |
759                 gr_gpcs_tpcs_pe_pin_cb_global_base_addr_valid_true_f(), patch);
760
761         return 0;
762 }
763
764 static int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c, u32 patch)
765 {
766         struct gr_gk20a *gr = &g->gr;
767         u32 gpm_pd_cfg;
768         u32 pd_ab_dist_cfg0;
769         u32 ds_debug;
770         u32 mpc_vtg_debug;
771         u32 pe_vaf;
772         u32 pe_vsc_vpc;
773
774         nvhost_dbg_fn("");
775
776         gpm_pd_cfg = gk20a_readl(g, gr_gpcs_gpm_pd_cfg_r());
777         pd_ab_dist_cfg0 = gk20a_readl(g, gr_pd_ab_dist_cfg0_r());
778         ds_debug = gk20a_readl(g, gr_ds_debug_r());
779         mpc_vtg_debug = gk20a_readl(g, gr_gpcs_tpcs_mpc_vtg_debug_r());
780
781         if (gr->timeslice_mode == gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v()) {
782                 pe_vaf = gk20a_readl(g, gr_gpcs_tpcs_pe_vaf_r());
783                 pe_vsc_vpc = gk20a_readl(g, gr_gpcs_tpcs_pes_vsc_vpc_r());
784
785                 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_enable_f() | gpm_pd_cfg;
786                 pe_vaf = gr_gpcs_tpcs_pe_vaf_fast_mode_switch_true_f() | pe_vaf;
787                 pe_vsc_vpc = gr_gpcs_tpcs_pes_vsc_vpc_fast_mode_switch_true_f() | pe_vsc_vpc;
788                 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_en_f() | pd_ab_dist_cfg0;
789                 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
790                 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
791
792                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
793                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, patch);
794                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, patch);
795                 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
796                 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
797                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
798         } else {
799                 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
800                 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
801                 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
802                 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
803
804                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, patch);
805                 gr_gk20a_ctx_patch_write(g, c, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, patch);
806                 gr_gk20a_ctx_patch_write(g, c, gr_ds_debug_r(), ds_debug, patch);
807                 gr_gk20a_ctx_patch_write(g, c, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, patch);
808         }
809
810         return 0;
811 }
812
813 static int gr_gk20a_setup_rop_mapping(struct gk20a *g,
814                                 struct gr_gk20a *gr)
815 {
816         u32 norm_entries, norm_shift;
817         u32 coeff5_mod, coeff6_mod, coeff7_mod, coeff8_mod, coeff9_mod, coeff10_mod, coeff11_mod;
818         u32 map0, map1, map2, map3, map4, map5;
819
820         if (!gr->map_tiles)
821                 return -1;
822
823         nvhost_dbg_fn("");
824
825         gk20a_writel(g, gr_crstr_map_table_cfg_r(),
826                      gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
827                      gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
828
829         map0 =  gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) |
830                 gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) |
831                 gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) |
832                 gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) |
833                 gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) |
834                 gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]);
835
836         map1 =  gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) |
837                 gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) |
838                 gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) |
839                 gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) |
840                 gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) |
841                 gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]);
842
843         map2 =  gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) |
844                 gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) |
845                 gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) |
846                 gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) |
847                 gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) |
848                 gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]);
849
850         map3 =  gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) |
851                 gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) |
852                 gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) |
853                 gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) |
854                 gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) |
855                 gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]);
856
857         map4 =  gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) |
858                 gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) |
859                 gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) |
860                 gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) |
861                 gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) |
862                 gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]);
863
864         map5 =  gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) |
865                 gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) |
866                 gr_crstr_gpc_map5_tile32_f(0) |
867                 gr_crstr_gpc_map5_tile33_f(0) |
868                 gr_crstr_gpc_map5_tile34_f(0) |
869                 gr_crstr_gpc_map5_tile35_f(0);
870
871         gk20a_writel(g, gr_crstr_gpc_map0_r(), map0);
872         gk20a_writel(g, gr_crstr_gpc_map1_r(), map1);
873         gk20a_writel(g, gr_crstr_gpc_map2_r(), map2);
874         gk20a_writel(g, gr_crstr_gpc_map3_r(), map3);
875         gk20a_writel(g, gr_crstr_gpc_map4_r(), map4);
876         gk20a_writel(g, gr_crstr_gpc_map5_r(), map5);
877
878         switch (gr->tpc_count) {
879         case 1:
880                 norm_shift = 4;
881                 break;
882         case 2:
883         case 3:
884                 norm_shift = 3;
885                 break;
886         case 4:
887         case 5:
888         case 6:
889         case 7:
890                 norm_shift = 2;
891                 break;
892         case 8:
893         case 9:
894         case 10:
895         case 11:
896         case 12:
897         case 13:
898         case 14:
899         case 15:
900                 norm_shift = 1;
901                 break;
902         default:
903                 norm_shift = 0;
904                 break;
905         }
906
907         norm_entries = gr->tpc_count << norm_shift;
908         coeff5_mod = (1 << 5) % norm_entries;
909         coeff6_mod = (1 << 6) % norm_entries;
910         coeff7_mod = (1 << 7) % norm_entries;
911         coeff8_mod = (1 << 8) % norm_entries;
912         coeff9_mod = (1 << 9) % norm_entries;
913         coeff10_mod = (1 << 10) % norm_entries;
914         coeff11_mod = (1 << 11) % norm_entries;
915
916         gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg_r(),
917                      gr_ppcs_wwdx_map_table_cfg_row_offset_f(gr->map_row_offset) |
918                      gr_ppcs_wwdx_map_table_cfg_normalized_num_entries_f(norm_entries) |
919                      gr_ppcs_wwdx_map_table_cfg_normalized_shift_value_f(norm_shift) |
920                      gr_ppcs_wwdx_map_table_cfg_coeff5_mod_value_f(coeff5_mod) |
921                      gr_ppcs_wwdx_map_table_cfg_num_entries_f(gr->tpc_count));
922
923         gk20a_writel(g, gr_ppcs_wwdx_map_table_cfg2_r(),
924                      gr_ppcs_wwdx_map_table_cfg2_coeff6_mod_value_f(coeff6_mod) |
925                      gr_ppcs_wwdx_map_table_cfg2_coeff7_mod_value_f(coeff7_mod) |
926                      gr_ppcs_wwdx_map_table_cfg2_coeff8_mod_value_f(coeff8_mod) |
927                      gr_ppcs_wwdx_map_table_cfg2_coeff9_mod_value_f(coeff9_mod) |
928                      gr_ppcs_wwdx_map_table_cfg2_coeff10_mod_value_f(coeff10_mod) |
929                      gr_ppcs_wwdx_map_table_cfg2_coeff11_mod_value_f(coeff11_mod));
930
931         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map0_r(), map0);
932         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map1_r(), map1);
933         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map2_r(), map2);
934         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map3_r(), map3);
935         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map4_r(), map4);
936         gk20a_writel(g, gr_ppcs_wwdx_map_gpc_map5_r(), map5);
937
938         gk20a_writel(g, gr_rstr2d_map_table_cfg_r(),
939                      gr_rstr2d_map_table_cfg_row_offset_f(gr->map_row_offset) |
940                      gr_rstr2d_map_table_cfg_num_entries_f(gr->tpc_count));
941
942         gk20a_writel(g, gr_rstr2d_gpc_map0_r(), map0);
943         gk20a_writel(g, gr_rstr2d_gpc_map1_r(), map1);
944         gk20a_writel(g, gr_rstr2d_gpc_map2_r(), map2);
945         gk20a_writel(g, gr_rstr2d_gpc_map3_r(), map3);
946         gk20a_writel(g, gr_rstr2d_gpc_map4_r(), map4);
947         gk20a_writel(g, gr_rstr2d_gpc_map5_r(), map5);
948
949         return 0;
950 }
951
952 static inline u32 count_bits(u32 mask)
953 {
954         u32 temp = mask;
955         u32 count;
956         for (count = 0; temp != 0; count++)
957                 temp &= temp - 1;
958
959         return count;
960 }
961
962 static inline u32 clear_count_bits(u32 num, u32 clear_count)
963 {
964         u32 count = clear_count;
965         for (; (num != 0) && (count != 0); count--)
966                 num &= num - 1;
967
968         return num;
969 }
970
971 static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g,
972                                         struct gr_gk20a *gr)
973 {
974         u32 table_index_bits = 5;
975         u32 rows = (1 << table_index_bits);
976         u32 row_stride = gr_pd_alpha_ratio_table__size_1_v() / rows;
977
978         u32 row;
979         u32 index;
980         u32 gpc_index;
981         u32 gpcs_per_reg = 4;
982         u32 pes_index;
983         u32 tpc_count_pes;
984         u32 num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
985
986         u32 alpha_target, beta_target;
987         u32 alpha_bits, beta_bits;
988         u32 alpha_mask, beta_mask, partial_mask;
989         u32 reg_offset;
990         bool assign_alpha;
991
992         u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()];
993         u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()];
994         u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()];
995
996         nvhost_dbg_fn("");
997
998         memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
999         memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1000         memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32));
1001
1002         for (row = 0; row < rows; ++row) {
1003                 alpha_target = max_t(u32, gr->tpc_count * row / rows, 1);
1004                 beta_target = gr->tpc_count - alpha_target;
1005
1006                 assign_alpha = (alpha_target < beta_target);
1007
1008                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1009                         reg_offset = (row * row_stride) + (gpc_index / gpcs_per_reg);
1010                         alpha_mask = beta_mask = 0;
1011
1012                         for (pes_index = 0; pes_index < num_pes_per_gpc; pes_index++) {
1013                                 tpc_count_pes = gr->pes_tpc_count[pes_index][gpc_index];
1014
1015                                 if (assign_alpha) {
1016                                         alpha_bits = (alpha_target == 0) ? 0 : tpc_count_pes;
1017                                         beta_bits = tpc_count_pes - alpha_bits;
1018                                 } else {
1019                                         beta_bits = (beta_target == 0) ? 0 : tpc_count_pes;
1020                                         alpha_bits = tpc_count_pes - beta_bits;
1021                                 }
1022
1023                                 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index];
1024                                 partial_mask = clear_count_bits(partial_mask, tpc_count_pes - alpha_bits);
1025                                 alpha_mask |= partial_mask;
1026
1027                                 partial_mask = gr->pes_tpc_mask[pes_index][gpc_index] ^ partial_mask;
1028                                 beta_mask |= partial_mask;
1029
1030                                 alpha_target -= min(alpha_bits, alpha_target);
1031                                 beta_target -= min(beta_bits, beta_target);
1032
1033                                 if ((alpha_bits > 0) || (beta_bits > 0))
1034                                         assign_alpha = !assign_alpha;
1035                         }
1036
1037                         switch (gpc_index % gpcs_per_reg) {
1038                         case 0:
1039                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n0_mask_f(alpha_mask);
1040                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n0_mask_f(beta_mask);
1041                                 break;
1042                         case 1:
1043                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n1_mask_f(alpha_mask);
1044                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n1_mask_f(beta_mask);
1045                                 break;
1046                         case 2:
1047                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n2_mask_f(alpha_mask);
1048                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n2_mask_f(beta_mask);
1049                                 break;
1050                         case 3:
1051                                 map_alpha[reg_offset] |= gr_pd_alpha_ratio_table_gpc_4n3_mask_f(alpha_mask);
1052                                 map_beta[reg_offset] |= gr_pd_beta_ratio_table_gpc_4n3_mask_f(beta_mask);
1053                                 break;
1054                         }
1055                         map_reg_used[reg_offset] = true;
1056                 }
1057         }
1058
1059         for (index = 0; index < gr_pd_alpha_ratio_table__size_1_v(); index++) {
1060                 if (map_reg_used[index]) {
1061                         gk20a_writel(g, gr_pd_alpha_ratio_table_r(index), map_alpha[index]);
1062                         gk20a_writel(g, gr_pd_beta_ratio_table_r(index), map_beta[index]);
1063                 }
1064         }
1065
1066         return 0;
1067 }
1068
1069 static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1070 {
1071         struct gr_gk20a *gr = &g->gr;
1072         u32 tpc_index, gpc_index;
1073         u32 tpc_offset, gpc_offset;
1074         u32 sm_id = 0, gpc_id = 0;
1075         u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
1076         u32 tpc_per_gpc;
1077         u32 max_ways_evict = INVALID_MAX_WAYS;
1078
1079         nvhost_dbg_fn("");
1080
1081         for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) {
1082                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1083                         gpc_offset = proj_gpc_stride_v() * gpc_index;
1084                         if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1085                                 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc_index;
1086
1087                                 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1088                                              gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1089                                 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1090                                              gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1091                                 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
1092                                              gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1093                                 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1094                                              gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1095
1096                                 sm_id_to_gpc_id[sm_id] = gpc_index;
1097                                 sm_id++;
1098                         }
1099
1100                         gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
1101                                      gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1102                         gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1103                                      gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1104                 }
1105         }
1106
1107         for (tpc_index = 0, gpc_id = 0;
1108              tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1109              tpc_index++, gpc_id += 8) {
1110
1111                 if (gpc_id >= gr->gpc_count)
1112                         gpc_id = 0;
1113
1114                 tpc_per_gpc =
1115                         gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
1116                         gr_pd_num_tpc_per_gpc_count1_f(gr->gpc_tpc_count[gpc_id + 1]) |
1117                         gr_pd_num_tpc_per_gpc_count2_f(gr->gpc_tpc_count[gpc_id + 2]) |
1118                         gr_pd_num_tpc_per_gpc_count3_f(gr->gpc_tpc_count[gpc_id + 3]) |
1119                         gr_pd_num_tpc_per_gpc_count4_f(gr->gpc_tpc_count[gpc_id + 4]) |
1120                         gr_pd_num_tpc_per_gpc_count5_f(gr->gpc_tpc_count[gpc_id + 5]) |
1121                         gr_pd_num_tpc_per_gpc_count6_f(gr->gpc_tpc_count[gpc_id + 6]) |
1122                         gr_pd_num_tpc_per_gpc_count7_f(gr->gpc_tpc_count[gpc_id + 7]);
1123
1124                 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1125                 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(tpc_index), tpc_per_gpc);
1126         }
1127
1128         /* grSetupPDMapping stubbed for gk20a */
1129         gr_gk20a_setup_rop_mapping(g, gr);
1130         gr_gk20a_setup_alpha_beta_tables(g, gr);
1131
1132         if (gr->num_fbps == 1)
1133                 max_ways_evict = 9;
1134
1135         if (max_ways_evict != INVALID_MAX_WAYS)
1136                 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(),
1137                              ((gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) &
1138                                ~(ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0))) |
1139                               ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways_evict)));
1140
1141         for (gpc_index = 0;
1142              gpc_index < gr_pd_dist_skip_table__size_1_v() * 4;
1143              gpc_index += 4) {
1144
1145                 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4),
1146                              gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) ||
1147                              gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) ||
1148                              gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) ||
1149                              gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
1150         }
1151
1152         gk20a_writel(g, gr_cwd_fs_r(),
1153                      gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1154                      gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1155
1156         gk20a_writel(g, gr_bes_zrop_settings_r(),
1157                      gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
1158         gk20a_writel(g, gr_bes_crop_settings_r(),
1159                      gr_bes_crop_settings_num_active_fbps_f(gr->num_fbps));
1160
1161         return 0;
1162 }
1163
1164 static int gr_gk20a_force_image_save(struct channel_gk20a *c, u32 save_type)
1165 {
1166         struct gk20a *g = c->g;
1167         int ret;
1168
1169         u32 inst_base_ptr =
1170                 u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
1171
1172         nvhost_dbg_fn("");
1173
1174         ret = gr_gk20a_submit_fecs_method(g, 0, 0, 3,
1175                         gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1176                         gr_fecs_current_ctx_target_vid_mem_f() |
1177                         gr_fecs_current_ctx_valid_f(1), save_type, 0,
1178                         GR_IS_UCODE_OP_AND, 1, GR_IS_UCODE_OP_AND, 2);
1179         if (ret)
1180                 nvhost_err(dev_from_gk20a(g), "save context image failed");
1181
1182         return ret;
1183 }
1184
1185 /* init global golden image from a fresh gr_ctx in channel ctx.
1186    save a copy in local_golden_image in ctx_vars */
1187 static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1188                                           struct channel_gk20a *c)
1189 {
1190         struct gr_gk20a *gr = &g->gr;
1191         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1192         u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1193         u32 ctx_header_words;
1194         u32 i;
1195         u32 data;
1196         void *ctx_ptr = NULL;
1197         void *gold_ptr = NULL;
1198         u32 err = 0;
1199
1200         nvhost_dbg_fn("");
1201
1202         err = gr_gk20a_ctx_bind_first_channel(g, c);
1203         if (err)
1204                 goto clean_up;
1205
1206         err = gr_gk20a_commit_global_ctx_buffers(g, c, 0);
1207         if (err)
1208                 goto clean_up;
1209
1210         gold_ptr = mem_op().mmap(gr->global_ctx_buffer[GOLDEN_CTX].ref);
1211         if (IS_ERR(gold_ptr))
1212                 goto clean_up;
1213
1214         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
1215         if (IS_ERR(ctx_ptr))
1216                 goto clean_up;
1217
1218         ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
1219         ctx_header_words >>= 2;
1220
1221         for (i = 0; i < ctx_header_words; i++) {
1222                 data = mem_rd32(ctx_ptr, i);
1223                 mem_wr32(gold_ptr, i, data);
1224         }
1225
1226         mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_v(), 0,
1227                  ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1228
1229         mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_v(), 0, 0);
1230
1231         gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1232
1233         gr_gk20a_force_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_f());
1234
1235         if (gr->ctx_vars.local_golden_image == NULL) {
1236
1237                 gr->ctx_vars.local_golden_image =
1238                         kzalloc(gr->ctx_vars.golden_image_size, GFP_KERNEL);
1239
1240                 if (gr->ctx_vars.local_golden_image == NULL) {
1241                         err = -ENOMEM;
1242                         goto clean_up;
1243                 }
1244
1245                 for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1246                         gr->ctx_vars.local_golden_image[i] =
1247                                 mem_rd32(gold_ptr, i);
1248         }
1249
1250         gr->ctx_vars.golden_image_initialized = true;
1251
1252         /* TBD: determine if this is necessary
1253            Bug 1035430 */
1254         gk20a_writel(g, gr_fecs_current_ctx_r(),
1255                 gr_fecs_current_ctx_valid_false_f());
1256
1257 clean_up:
1258         if (err)
1259                 nvhost_dbg(dbg_fn | dbg_err, "fail");
1260         else
1261                 nvhost_dbg_fn("done");
1262
1263         if (gold_ptr)
1264                 mem_op().munmap(gr->global_ctx_buffer[GOLDEN_CTX].ref,
1265                                 gold_ptr);
1266         if (ctx_ptr)
1267                 mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1268
1269         return err;
1270 }
1271
1272 /* load saved fresh copy of gloden image into channel gr_ctx */
1273 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1274                                         struct channel_gk20a *c)
1275 {
1276         struct gr_gk20a *gr = &g->gr;
1277         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1278         u32 virt_addr_lo;
1279         u32 virt_addr_hi;
1280         u32 i;
1281         int ret = 0;
1282         void *ctx_ptr = NULL;
1283
1284         nvhost_dbg_fn("");
1285
1286         if (gr->ctx_vars.local_golden_image == NULL)
1287                 return -1;
1288
1289         ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
1290         if (IS_ERR(ctx_ptr))
1291                 return -ENOMEM;
1292
1293         for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++)
1294                 mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]);
1295
1296         mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_v(), 0, 0);
1297         mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_v(), 0, 0);
1298
1299         virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.gpu_va);
1300         virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.gpu_va);
1301
1302         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_v(), 0,
1303                  ch_ctx->patch_ctx.data_count);
1304         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_v(), 0,
1305                  virt_addr_lo);
1306         mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_v(), 0,
1307                  virt_addr_hi);
1308
1309         mem_op().munmap(ch_ctx->gr_ctx.mem.ref, ctx_ptr);
1310
1311         /* gr_gk20a_ctx_zcull_setup(g, c, false); */
1312         gr_gk20a_ctx_pm_setup(g, c, false);
1313
1314         if (tegra_revision == TEGRA_REVISION_SIM) {
1315                 u32 inst_base_ptr =
1316                         u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
1317
1318                 ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0,
1319                                 gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
1320                                 gr_fecs_current_ctx_target_vid_mem_f() |
1321                                 gr_fecs_current_ctx_valid_f(1),
1322                                 gr_fecs_method_push_adr_restore_golden_f(), 0,
1323                                 GR_IS_UCODE_OP_EQUAL, gr_fecs_ctxsw_mailbox_value_pass_v(),
1324                                 GR_IS_UCODE_OP_SKIP, 0);
1325                 if (ret)
1326                         nvhost_err(dev_from_gk20a(g),
1327                                    "restore context image failed");
1328         }
1329
1330         return ret;
1331 }
1332
1333 static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
1334 {
1335         nvhost_dbg_fn("");
1336
1337         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0),
1338                      gr_fecs_ctxsw_mailbox_clear_value_f(~0));
1339
1340         gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0));
1341         gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0));
1342
1343         gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1));
1344         gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1));
1345
1346         nvhost_dbg_fn("done");
1347 }
1348
1349 static int gr_gk20a_load_ctxsw_ucode(struct gk20a *g, struct gr_gk20a *gr)
1350 {
1351         u32 ret;
1352
1353         nvhost_dbg_fn("");
1354
1355         if (tegra_revision == TEGRA_REVISION_SIM) {
1356                 gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
1357                         gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
1358                 gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
1359                         gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
1360         }
1361
1362         gr_gk20a_load_falcon_dmem(g);
1363         gr_gk20a_load_falcon_imem(g);
1364
1365         gr_gk20a_start_falcon_ucode(g);
1366
1367         ret = gr_gk20a_ctx_wait_ucode(g, 0, 0,
1368                                       GR_IS_UCODE_OP_EQUAL,
1369                                       eUcodeHandshakeInitComplete,
1370                                       GR_IS_UCODE_OP_SKIP, 0);
1371         if (ret) {
1372                 nvhost_err(dev_from_gk20a(g), "falcon ucode init timeout");
1373                 return ret;
1374         }
1375
1376         gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
1377         gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
1378         gk20a_writel(g, gr_fecs_method_push_r(),
1379                      gr_fecs_method_push_adr_set_watchdog_timeout_f());
1380
1381         nvhost_dbg_fn("done");
1382         return 0;
1383 }
1384
1385 #define PRI_GPCCS_ADDR_WIDTH 15
1386 #define CTXSW_UCODE_HEADER_SIZE_IN_BYTES 256
1387
1388 #define PRI_GPCCS_ADDR_MASK(addr)       ((addr) & ((1 << PRI_GPCCS_ADDR_WIDTH) - 1))
1389 #define PRI_GPC_ADDR(addr, gpc)         (proj_gpc_base_v()+((gpc)*proj_gpc_stride_v())+(addr))
1390
1391 static int gr_gk20a_create_ctx_header(struct gk20a *g, u32 *header)
1392 {
1393         u32 *header_curr;
1394         u32 num_gpcs;
1395         u32 num_tpcs;
1396         u32 num_ppcs;
1397         u32 tpc_id_mask;
1398         u32 ppc_mask;
1399         u32 rc_offset, rc_size;
1400         u32 num_fecs_ramchains;
1401         u32 num_gpc_ramchains;
1402         u32 sys_priv_size;
1403         u32 sys_priv_offset;
1404         u32 gpc_priv_size;
1405         u32 gpc_priv_offset;
1406         u32 fecs_image_size;
1407         u32 gpc_image_size;
1408         u32 total_image_size;
1409         u32 lane, gpc, ppc;
1410         u32 addr, words, bytes;
1411         u32 litter_num_pes_per_gpc;
1412
1413         if (!g->gr.ctx_vars.valid)
1414                 return -1;
1415
1416         nvhost_dbg_fn("");
1417
1418         if (tegra_revision == TEGRA_REVISION_SIM) {
1419                 num_gpcs = g->gr.gpc_count;
1420         } else {
1421                 num_gpcs = gk20a_readl(g, gr_fecs_fs_r());
1422                 num_gpcs = gr_fecs_fs_num_available_gpcs_v(num_gpcs);
1423         }
1424
1425         header_curr = header;
1426
1427         header_curr[ctxsw_prog_main_image_num_gpc_v() >> 2] = num_gpcs;
1428         header_curr[ctxsw_prog_main_image_magic_value_v() >> 2] =
1429                 ctxsw_prog_main_image_magic_value_v_value_f();
1430
1431         fecs_image_size = g->gr.ctx_vars.ctxsw_regs.sys.count << 2;
1432         fecs_image_size = ((fecs_image_size + 255) & ~255);
1433
1434         sys_priv_size = fecs_image_size >> 8;
1435         sys_priv_offset = 2 + num_gpcs;
1436
1437         header_curr += (CTXSW_UCODE_HEADER_SIZE_IN_BYTES >> 2);
1438         header_curr[ctxsw_prog_local_reg_ctl_v() >> 2] =
1439                 ctxsw_prog_local_reg_ctl_offset_f(sys_priv_offset) |
1440                 ctxsw_prog_local_reg_ctl_size_f(sys_priv_size);
1441         header_curr[ctxsw_prog_local_magic_value_v() >> 2] =
1442                 ctxsw_prog_local_magic_value_v_value_f();
1443
1444         if (tegra_revision != TEGRA_REVISION_SIM) {
1445                 rc_offset = 0;
1446                 rc_size = 0;
1447
1448                 num_fecs_ramchains = gr_fecs_rc_lanes_num_chains_v(
1449                         gk20a_readl(g, gr_fecs_rc_lanes_r()));
1450
1451                 header_curr[ctxsw_prog_local_image_ctl_v() >> 2] =
1452                         ctxsw_prog_local_image_ctl_num_ramchains_f(num_fecs_ramchains);
1453
1454                 for (lane = 0; lane < num_fecs_ramchains; lane++) {
1455                         rc_offset += (rc_size >> 8);
1456
1457                         gk20a_writel(g, gr_fecs_falcon_addr_v(), lane);
1458                         words = gr_fecs_rc_lane_size_v_v(
1459                                         gk20a_readl(g, gr_fecs_rc_lane_size_r(0)));
1460                         header_curr[ctxsw_prog_local_ramchain_save_v(lane) >> 2] =
1461                                 ctxsw_prog_local_ramchain_save_words_f(words);
1462                         bytes = words << 2;
1463
1464                         if (bytes)
1465                                 header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1466                                         ctxsw_prog_local_ramchain_ctl_offset_f(rc_offset);
1467                         else
1468                                 header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1469                                         ctxsw_prog_local_ramchain_ctl_offset_f(0);
1470
1471                         rc_size = (bytes + 0xFF) & ~0xFF;
1472                         fecs_image_size += rc_size;
1473                 }
1474         }
1475
1476         header_curr[ctxsw_prog_local_image_size_v() >> 2] = fecs_image_size;
1477         total_image_size = fecs_image_size + 256 + 256 + num_gpcs * 256;
1478
1479         litter_num_pes_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
1480         for (gpc = 0; gpc < num_gpcs; gpc++) {
1481
1482                 header_curr += (CTXSW_UCODE_HEADER_SIZE_IN_BYTES >> 2);
1483
1484                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(gr_gpc0_fs_gpc_r()), gpc);
1485                 num_tpcs = gr_gpc0_fs_gpc_num_available_tpcs_v(
1486                                 gk20a_readl(g, addr));
1487
1488                 if (litter_num_pes_per_gpc > 1) {
1489                         num_ppcs = 0;
1490                         ppc_mask = 0;
1491                         for (ppc = 0; ppc < litter_num_pes_per_gpc; ppc++) {
1492                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1493                                         gr_gpc0_gpm_pd_pes_tpc_id_mask_r(ppc)), gpc);
1494                                 tpc_id_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(
1495                                         gk20a_readl(g, addr));
1496                                 if (tpc_id_mask) {
1497                                         num_ppcs++;
1498                                         ppc_mask |= (1 << ppc);
1499                                 }
1500                         }
1501                         header_curr[ctxsw_prog_local_image_ppc_info_v() >> 2] =
1502                                 ctxsw_prog_local_image_ppc_info_ppc_mask_f(ppc_mask) |
1503                                 ctxsw_prog_local_image_ppc_info_num_ppcs_f(num_ppcs);
1504                 }
1505
1506                 gpc_priv_offset = total_image_size >> 8;
1507                 gpc_image_size = (g->gr.ctx_vars.ctxsw_regs.gpc.count +
1508                                   g->gr.ctx_vars.ctxsw_regs.tpc.count * num_tpcs) << 2;
1509                 gpc_image_size = ((gpc_image_size + 0xFF) & ~0xFF);
1510                 gpc_priv_size = gpc_image_size >> 8;
1511
1512                 header_curr[ctxsw_prog_local_reg_ctl_v() >> 2] =
1513                         ctxsw_prog_local_reg_ctl_offset_f(gpc_priv_offset) |
1514                         ctxsw_prog_local_reg_ctl_size_f(gpc_priv_size);
1515
1516                 header_curr[ctxsw_prog_local_image_num_tpcs_v() >> 2] =
1517                         num_tpcs;
1518                 header_curr[ctxsw_prog_local_magic_value_v() >> 2] =
1519                         ctxsw_prog_local_magic_value_v_value_f();
1520
1521                 if (tegra_revision != TEGRA_REVISION_SIM) {
1522                         rc_offset = 0;
1523                         rc_size = 0;
1524
1525                         addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1526                                 gr_gpccs_rc_lanes_r()), gpc);
1527                         num_gpc_ramchains = gr_gpccs_rc_lanes_num_chains_v(
1528                                 gk20a_readl(g, addr));
1529
1530                         header_curr[ctxsw_prog_local_image_ctl_v() >> 2] =
1531                                 ctxsw_prog_local_image_ctl_num_ramchains_f(num_gpc_ramchains);
1532
1533                         for (lane = 0; lane < num_gpc_ramchains; lane++) {
1534                                 rc_offset += rc_size >> 8;
1535
1536                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1537                                                 gr_gpccs_falcon_addr_r()), gpc);
1538                                 gk20a_writel(g, addr, lane);
1539
1540                                 addr = PRI_GPC_ADDR(PRI_GPCCS_ADDR_MASK(
1541                                                 gr_gpccs_rc_lane_size_r(0)), gpc);
1542                                 words = gr_gpccs_rc_lane_size_v_v(
1543                                                 gk20a_readl(g, addr));
1544
1545                                 header_curr[ctxsw_prog_local_ramchain_save_v(lane) >> 2] =
1546                                         ctxsw_prog_local_ramchain_save_words_f(words);
1547                                 bytes = words << 2;
1548
1549                                 if (bytes)
1550                                         header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1551                                                 ctxsw_prog_local_ramchain_ctl_offset_f(words);
1552                                 else
1553                                         header_curr[ctxsw_prog_local_ramchain_ctl_v(lane) >> 2] =
1554                                                 ctxsw_prog_local_ramchain_ctl_offset_f(0);
1555
1556                                 rc_size = (bytes + 0xFF) & ~0xFF;
1557                                 gpc_image_size += rc_size;
1558                         }
1559                 }
1560
1561                 header_curr[ctxsw_prog_local_image_size_v() >> 2] = gpc_image_size;
1562                 total_image_size += gpc_image_size;
1563         }
1564
1565         header[ctxsw_prog_main_image_size_v() >> 2] = total_image_size;
1566
1567         return 0;
1568 }
1569
1570 static int gr_gk20a_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
1571 {
1572         u32 golden_ctx_image_size = 0;
1573         u32 zcull_ctx_image_size = 0;
1574         u32 pm_ctx_image_size = 0;
1575         u32 ret;
1576
1577         nvhost_dbg_fn("");
1578
1579         if (g->gr.ctx_vars.golden_image_size)
1580                 return 0;
1581
1582         /* 256 bytes hdr + 256 bytes FECS + numGpc * 256 bytes GPCCS */
1583         gr->ctx_vars.buffer_header_size = 256 + 256 + 256 * gr->gpc_count;
1584
1585         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1586                         gr_fecs_method_push_adr_discover_image_size_f(),
1587                         &golden_ctx_image_size,
1588                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1589         if (ret) {
1590                 nvhost_err(dev_from_gk20a(g),
1591                            "query golden image size failed");
1592                 return ret;
1593         }
1594
1595         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1596                         gr_fecs_method_push_adr_discover_zcull_image_size_f(),
1597                         &zcull_ctx_image_size,
1598                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1599         if (ret) {
1600                 nvhost_err(dev_from_gk20a(g),
1601                            "query zcull ctx image size failed");
1602                 return ret;
1603         }
1604
1605         ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 0,
1606                         gr_fecs_method_push_adr_discover_pm_image_size_f(),
1607                         &pm_ctx_image_size,
1608                         GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
1609         if (ret) {
1610                 nvhost_err(dev_from_gk20a(g),
1611                            "query pm ctx image size failed");
1612                 return ret;
1613         }
1614
1615         g->gr.ctx_vars.golden_image_size = golden_ctx_image_size;
1616         g->gr.ctx_vars.zcull_ctxsw_image_size = zcull_ctx_image_size;
1617
1618         /* create a temp header for ctx override */
1619         if (!gr->temp_ctx_header) {
1620                 gr->temp_ctx_header =
1621                         kzalloc(gr->ctx_vars.buffer_header_size, GFP_KERNEL);
1622                 if (!gr->temp_ctx_header)
1623                         return -ENOMEM;
1624         }
1625
1626         gr_gk20a_create_ctx_header(g, (u32 *)gr->temp_ctx_header);
1627
1628         nvhost_dbg_fn("done");
1629         return 0;
1630 }
1631
1632 static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
1633 {
1634         struct gr_gk20a *gr = &g->gr;
1635         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1636         struct mem_handle *mem;
1637         u32 i, attr_buffer_size;
1638
1639         u32 cb_buffer_size = gr_scc_bundle_cb_size_div_256b__prod_v() *
1640                 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
1641
1642         u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
1643                 gr_scc_pagepool_total_pages_byte_granularity_v();
1644
1645         u32 attr_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
1646         u32 alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
1647
1648         u32 attr_cb_size =
1649                 attr_cb_default_size + (attr_cb_default_size >> 1);
1650         u32 alpha_cb_size =
1651                 alpha_cb_default_size + (alpha_cb_default_size >> 1);
1652
1653         u32 num_tpcs_per_pes = proj_scal_litter_num_tpcs_per_pes_v();
1654         u32 attr_max_size_per_tpc =
1655                 gr_gpc0_ppc0_cbm_cfg_size_v(~0) / num_tpcs_per_pes;
1656         u32 alpha_max_size_per_tpc =
1657                 gr_gpc0_ppc0_cbm_cfg2_size_v(~0) / num_tpcs_per_pes;
1658
1659
1660         nvhost_dbg_fn("");
1661
1662         attr_cb_size =
1663                 (attr_cb_size > attr_max_size_per_tpc) ?
1664                         attr_max_size_per_tpc : attr_cb_size;
1665         attr_cb_default_size =
1666                 (attr_cb_default_size > attr_cb_size) ?
1667                         attr_cb_size : attr_cb_default_size;
1668         alpha_cb_size =
1669                 (alpha_cb_size > alpha_max_size_per_tpc) ?
1670                         alpha_max_size_per_tpc : alpha_cb_size;
1671         alpha_cb_default_size =
1672                 (alpha_cb_default_size > alpha_cb_size) ?
1673                         alpha_cb_size : alpha_cb_default_size;
1674
1675         attr_buffer_size =
1676                 (gr_gpc0_ppc0_cbm_cfg_size_granularity_v() * alpha_cb_size +
1677                  gr_gpc0_ppc0_cbm_cfg2_size_granularity_v() * alpha_cb_size) *
1678                  gr->gpc_count;
1679
1680         nvhost_dbg_info("cb_buffer_size : %d", cb_buffer_size);
1681
1682         mem = mem_op().alloc(memmgr, cb_buffer_size,
1683                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1684                           DEFAULT_NVMAP_ALLOC_FLAGS,
1685                           NVMAP_HEAP_CARVEOUT_GENERIC);
1686         if (IS_ERR_OR_NULL(mem))
1687                 goto clean_up;
1688
1689         gr->global_ctx_buffer[CIRCULAR].ref = mem;
1690         gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
1691
1692         nvhost_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
1693
1694         mem = mem_op().alloc(memmgr, pagepool_buffer_size,
1695                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1696                           DEFAULT_NVMAP_ALLOC_FLAGS,
1697                           NVMAP_HEAP_CARVEOUT_GENERIC);
1698         if (IS_ERR_OR_NULL(mem))
1699                 goto clean_up;
1700
1701         gr->global_ctx_buffer[PAGEPOOL].ref = mem;
1702         gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
1703
1704         nvhost_dbg_info("attr_buffer_size : %d", attr_buffer_size);
1705
1706         mem = mem_op().alloc(memmgr, attr_buffer_size,
1707                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1708                           DEFAULT_NVMAP_ALLOC_FLAGS,
1709                           NVMAP_HEAP_CARVEOUT_GENERIC);
1710         if (IS_ERR_OR_NULL(mem))
1711                 goto clean_up;
1712
1713         gr->global_ctx_buffer[ATTRIBUTE].ref = mem;
1714         gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
1715
1716         mem = mem_op().alloc(memmgr, attr_buffer_size,
1717                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1718                           DEFAULT_NVMAP_ALLOC_FLAGS,
1719                           NVMAP_HEAP_CARVEOUT_GENERIC); /* TBD: use NVMAP_HEAP_CARVEOUT_VPR */
1720         if (IS_ERR_OR_NULL(mem))
1721                 goto clean_up;
1722
1723         gr->global_ctx_buffer[ATTRIBUTE_VPR].ref = mem;
1724         gr->global_ctx_buffer[ATTRIBUTE_VPR].size = attr_buffer_size;
1725
1726         nvhost_dbg_info("golden_image_size : %d",
1727                    gr->ctx_vars.golden_image_size);
1728
1729         mem = mem_op().alloc(memmgr, gr->ctx_vars.golden_image_size,
1730                           DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1731                           DEFAULT_NVMAP_ALLOC_FLAGS,
1732                           NVMAP_HEAP_CARVEOUT_GENERIC);
1733         if (IS_ERR_OR_NULL(mem))
1734                 goto clean_up;
1735
1736         gr->global_ctx_buffer[GOLDEN_CTX].ref = mem;
1737         gr->global_ctx_buffer[GOLDEN_CTX].size =
1738                 gr->ctx_vars.golden_image_size;
1739
1740         nvhost_dbg_fn("done");
1741         return 0;
1742
1743  clean_up:
1744         nvhost_dbg(dbg_fn | dbg_err, "fail");
1745         for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1746                 if (gr->global_ctx_buffer[i].ref) {
1747                         mem_op().put(memmgr,
1748                                 gr->global_ctx_buffer[i].ref);
1749                         memset(&gr->global_ctx_buffer[i],
1750                                 0, sizeof(struct mem_desc));
1751                 }
1752         }
1753         return -ENOMEM;
1754 }
1755
1756 static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g)
1757 {
1758         struct gr_gk20a *gr = &g->gr;
1759         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1760         u32 i;
1761
1762         for (i = 0; i < NR_GLOBAL_CTX_BUF; i++) {
1763                 mem_op().put(memmgr, gr->global_ctx_buffer[i].ref);
1764                 memset(&gr->global_ctx_buffer[i], 0, sizeof(struct mem_desc));
1765         }
1766 }
1767
1768 static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
1769                                         struct channel_gk20a *c)
1770 {
1771         struct vm_gk20a *ch_vm = c->vm;
1772         struct mem_mgr *memmgr = mem_mgr_from_g(g);
1773         struct mem_handle *handle_ref;
1774         u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1775         struct gr_gk20a *gr = &g->gr;
1776         u64 gpu_va;
1777         u32 i;
1778         nvhost_dbg_fn("");
1779
1780         gpu_va = ch_vm->map(ch_vm, memmgr,
1781                             gr->global_ctx_buffer[CIRCULAR].ref,
1782                             0, 0, 0 /*offset_align, flags, kind*/);
1783         if (!gpu_va)
1784                 goto clean_up;
1785         g_bfr_va[CIRCULAR_VA] = gpu_va;
1786
1787         if (!c->vpr)
1788                 handle_ref = gr->global_ctx_buffer[ATTRIBUTE].ref;
1789         else
1790                 handle_ref = gr->global_ctx_buffer[ATTRIBUTE_VPR].ref;
1791
1792         gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
1793                             0, 0, 0 /*offset_align, flags, kind*/);
1794         if (!gpu_va)
1795                 goto clean_up;
1796         g_bfr_va[ATTRIBUTE_VA] = gpu_va;
1797
1798         gpu_va = ch_vm->map(ch_vm, memmgr,
1799                             gr->global_ctx_buffer[PAGEPOOL].ref,
1800                             0, 0, 0/*offset_align, flags, kind*/);
1801         if (!gpu_va)
1802                 goto clean_up;
1803         g_bfr_va[PAGEPOOL_VA] = gpu_va;
1804
1805         gpu_va = ch_vm->map(ch_vm, memmgr,
1806                             gr->global_ctx_buffer[GOLDEN_CTX].ref,
1807                             0, 0, 0 /*offset_align, flags, kind*/);
1808         if (!gpu_va)
1809                 goto clean_up;
1810         g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
1811
1812         c->ch_ctx.global_ctx_buffer_mapped = true;
1813         return 0;
1814
1815  clean_up:
1816         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1817                 if (g_bfr_va[i]) {
1818                         ch_vm->unmap(ch_vm, g_bfr_va[i]);
1819                         g_bfr_va[i] = 0;
1820                 }
1821         }
1822         return -ENOMEM;
1823 }
1824
1825 static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
1826 {
1827         struct vm_gk20a *ch_vm = c->vm;
1828         u32 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
1829         u32 i;
1830
1831         nvhost_dbg_fn("");
1832
1833         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
1834                 if (g_bfr_va[i]) {
1835                         ch_vm->unmap(ch_vm, g_bfr_va[i]);
1836                         g_bfr_va[i] = 0;
1837                 }
1838         }
1839         c->ch_ctx.global_ctx_buffer_mapped = false;
1840 }
1841
1842 static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
1843                                 struct channel_gk20a *c)
1844 {
1845         struct gr_gk20a *gr = &g->gr;
1846         struct gr_ctx_desc *gr_ctx = &c->ch_ctx.gr_ctx;
1847         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1848         struct vm_gk20a *ch_vm = c->vm;
1849
1850         nvhost_dbg_fn("");
1851
1852         if (gr->ctx_vars.buffer_size == 0)
1853                 return 0;
1854
1855         /* alloc channel gr ctx buffer */
1856         gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
1857         gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
1858
1859         gr_ctx->mem.ref = mem_op().alloc(memmgr,
1860                                 gr->ctx_vars.buffer_total_size,
1861                                 DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1862                                 DEFAULT_NVMAP_ALLOC_FLAGS,
1863                                 NVMAP_HEAP_CARVEOUT_GENERIC);
1864
1865         if (IS_ERR(gr_ctx->mem.ref))
1866                 return -ENOMEM;
1867
1868         gr_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1869                 gr_ctx->mem.ref, 0, 0, 0 /*offset_align, flags, kind*/);
1870         if (!gr_ctx->gpu_va) {
1871                 mem_op().put(memmgr, gr_ctx->mem.ref);
1872                 return -ENOMEM;
1873         }
1874
1875         return 0;
1876 }
1877
1878 static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
1879 {
1880         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1881         struct mem_mgr *ch_nvmap = gk20a_channel_mem_mgr(c);
1882         struct vm_gk20a *ch_vm = c->vm;
1883
1884         nvhost_dbg_fn("");
1885
1886         ch_vm->unmap(ch_vm, ch_ctx->gr_ctx.gpu_va);
1887         mem_op().put(ch_nvmap, ch_ctx->gr_ctx.mem.ref);
1888 }
1889
1890 static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
1891                                 struct channel_gk20a *c)
1892 {
1893         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1894         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1895         struct vm_gk20a *ch_vm = c->vm;
1896
1897         nvhost_dbg_fn("");
1898
1899         patch_ctx->mem.ref = mem_op().alloc(memmgr, 128 * sizeof(u32),
1900                                 DEFAULT_NVMAP_ALLOC_ALIGNMENT,
1901                                 DEFAULT_NVMAP_ALLOC_FLAGS,
1902                                 NVMAP_HEAP_CARVEOUT_GENERIC);
1903         if (IS_ERR(patch_ctx->mem.ref))
1904                 return -ENOMEM;
1905
1906         patch_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
1907                                 patch_ctx->mem.ref,
1908                                 0, 0, 0 /*offset_align, flags, kind*/);
1909         if (!patch_ctx->gpu_va)
1910                 goto clean_up;
1911
1912         nvhost_dbg_fn("done");
1913         return 0;
1914
1915  clean_up:
1916         nvhost_dbg(dbg_fn | dbg_err, "fail");
1917         if (patch_ctx->mem.ref) {
1918                 mem_op().put(memmgr, patch_ctx->mem.ref);
1919                 patch_ctx->mem.ref = 0;
1920         }
1921
1922         return -ENOMEM;
1923 }
1924
1925 static void gr_gk20a_unmap_channel_patch_ctx(struct channel_gk20a *c)
1926 {
1927         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1928         struct vm_gk20a *ch_vm = c->vm;
1929
1930         nvhost_dbg_fn("");
1931
1932         ch_vm->unmap(ch_vm, patch_ctx->gpu_va);
1933         patch_ctx->gpu_va = 0;
1934         patch_ctx->data_count = 0;
1935 }
1936
1937 static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c)
1938 {
1939         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
1940         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
1941
1942         nvhost_dbg_fn("");
1943
1944         gr_gk20a_unmap_channel_patch_ctx(c);
1945
1946         if (patch_ctx->mem.ref) {
1947                 mem_op().put(memmgr, patch_ctx->mem.ref);
1948                 patch_ctx->mem.ref = 0;
1949         }
1950 }
1951
1952 void gk20a_free_channel_ctx(struct channel_gk20a *c)
1953 {
1954         gr_gk20a_unmap_global_ctx_buffers(c);
1955         gr_gk20a_free_channel_patch_ctx(c);
1956         gr_gk20a_free_channel_gr_ctx(c);
1957
1958         /* zcull_ctx, pm_ctx */
1959
1960         memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
1961
1962         c->num_objects = 0;
1963         c->first_init = false;
1964 }
1965
1966 int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
1967                         struct nvhost_alloc_obj_ctx_args *args)
1968 {
1969         struct gk20a *g = c->g;
1970         struct gr_gk20a *gr = &g->gr;
1971         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
1972         bool change_to_compute_mode = false;
1973         int err = 0;
1974
1975         nvhost_dbg_fn("");
1976
1977         /* an address space needs to have been bound at this point.*/
1978         if (!gk20a_channel_as_bound(c)) {
1979                 nvhost_err(dev_from_gk20a(g),
1980                            "not bound to address space at time"
1981                            " of grctx allocation");
1982                 return -EINVAL;
1983         }
1984
1985         switch (args->class_num) {
1986         case KEPLER_COMPUTE_A:
1987                 /* tbd: NV2080_CTRL_GPU_COMPUTE_MODE_RULES_EXCLUSIVE_COMPUTE */
1988                 /* tbd: PDB_PROP_GRAPHICS_DISTINCT_3D_AND_COMPUTE_STATE_DEF  */
1989                 change_to_compute_mode = true;
1990                 break;
1991         case KEPLER_C:
1992         case FERMI_TWOD_A:
1993         case KEPLER_DMA_COPY_A:
1994                 break;
1995
1996         default:
1997                 nvhost_err(dev_from_gk20a(g),
1998                            "invalid obj class 0x%x", args->class_num);
1999                 err = -EINVAL;
2000                 goto out;
2001         }
2002
2003         /* allocate gr ctx buffer */
2004         if (ch_ctx->gr_ctx.mem.ref == NULL) {
2005                 err = gr_gk20a_alloc_channel_gr_ctx(g, c);
2006                 if (err) {
2007                         nvhost_err(dev_from_gk20a(g),
2008                                 "fail to allocate gr ctx buffer");
2009                         goto out;
2010                 }
2011         } else {
2012                 /*TBD: needs to be more subtle about which is being allocated
2013                 * as some are allowed to be allocated along same channel */
2014                 nvhost_err(dev_from_gk20a(g),
2015                         "too many classes alloc'd on same channel");
2016                 err = -EINVAL;
2017                 goto out;
2018         }
2019
2020         /* commit gr ctx buffer */
2021         err = gr_gk20a_commit_inst(c, ch_ctx->gr_ctx.gpu_va);
2022         if (err) {
2023                 nvhost_err(dev_from_gk20a(g),
2024                         "fail to commit gr ctx buffer");
2025                 goto out;
2026         }
2027
2028         /* set misc. might be possible to move around later */
2029         ch_ctx->pm_ctx.ctx_sw_mode =
2030                 ctxsw_prog_main_image_pm_mode_no_ctxsw_v();
2031
2032         /* allocate patch buffer */
2033         if (ch_ctx->patch_ctx.mem.ref == NULL) {
2034                 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2035                 if (err) {
2036                         nvhost_err(dev_from_gk20a(g),
2037                                 "fail to allocate patch buffer");
2038                         goto out;
2039                 }
2040         }
2041
2042         /* map global buffer to channel gpu_va and commit */
2043         if (!ch_ctx->global_ctx_buffer_mapped) {
2044                 err = gr_gk20a_map_global_ctx_buffers(g, c);
2045                 if (err) {
2046                         nvhost_err(dev_from_gk20a(g),
2047                                 "fail to map global ctx buffer");
2048                         goto out;
2049                 }
2050                 gr_gk20a_elpg_protected_call(g,
2051                         gr_gk20a_commit_global_ctx_buffers(g, c, 1));
2052         }
2053
2054         /* init gloden image, ELPG enabled after this is done */
2055         if (!gr->ctx_vars.golden_image_initialized) {
2056                 err = gr_gk20a_init_golden_ctx_image(g, c);
2057                 if (err) {
2058                         nvhost_err(dev_from_gk20a(g),
2059                                 "fail to init golden ctx image");
2060                         goto out;
2061                 }
2062         }
2063
2064         /* load golden image */
2065         if (!c->first_init) {
2066                 err = gr_gk20a_elpg_protected_call(g,
2067                         gr_gk20a_load_golden_ctx_image(g, c));
2068                 if (err) {
2069                         nvhost_err(dev_from_gk20a(g),
2070                                 "fail to load golden ctx image");
2071                         goto out;
2072                 }
2073                 c->first_init = true;
2074         }
2075
2076         c->num_objects++;
2077
2078         nvhost_dbg_fn("done");
2079         return 0;
2080 out:
2081         /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
2082            can be reused so no need to release them.
2083            2. golden image init and load is a one time thing so if
2084            they pass, no need to undo. */
2085         nvhost_dbg(dbg_fn | dbg_err, "fail");
2086         return err;
2087 }
2088
2089 int gk20a_free_obj_ctx(struct channel_gk20a  *c,
2090                        struct nvhost_free_obj_ctx_args *args)
2091 {
2092         nvhost_dbg_fn("");
2093
2094         if (c->num_objects == 0)
2095                 return 0;
2096
2097         c->num_objects--;
2098
2099         if (c->num_objects == 0) {
2100                 c->first_init = false;
2101                 gr_gk20a_unmap_channel_patch_ctx(c);
2102         }
2103
2104         return 0;
2105 }
2106
2107 static void gk20a_remove_gr_support(struct gk20a *g, struct gr_gk20a *gr)
2108 {
2109         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2110
2111         nvhost_dbg_fn("");
2112
2113         gr_gk20a_free_global_ctx_buffers(g);
2114
2115         mem_op().unpin(memmgr, gr->mmu_wr_mem.mem.ref);
2116         mem_op().unpin(memmgr, gr->mmu_rd_mem.mem.ref);
2117         mem_op().unpin(memmgr, gr->compbit_store.mem.ref);
2118         mem_op().put(memmgr, gr->mmu_wr_mem.mem.ref);
2119         mem_op().put(memmgr, gr->mmu_rd_mem.mem.ref);
2120         mem_op().put(memmgr, gr->compbit_store.mem.ref);
2121         kfree(gr->gpc_tpc_count);
2122         kfree(gr->gpc_ppc_count);
2123         kfree(gr->pes_tpc_count[0]);
2124         kfree(gr->pes_tpc_count[1]);
2125         kfree(gr->pes_tpc_mask[0]);
2126         kfree(gr->pes_tpc_mask[1]);
2127         kfree(gr->gpc_skip_mask);
2128         kfree(gr->temp_ctx_header);
2129         kfree(gr->ctx_vars.ucode.fecs.inst.l);
2130         kfree(gr->ctx_vars.ucode.fecs.data.l);
2131         kfree(gr->ctx_vars.ucode.gpccs.inst.l);
2132         kfree(gr->ctx_vars.ucode.gpccs.data.l);
2133         kfree(gr->ctx_vars.sw_bundle_init.l);
2134         kfree(gr->ctx_vars.sw_method_init.l);
2135         kfree(gr->ctx_vars.sw_ctx_load.l);
2136         kfree(gr->ctx_vars.sw_non_ctx_load.l);
2137         kfree(gr->ctx_vars.ctxsw_regs.sys.l);
2138         kfree(gr->ctx_vars.ctxsw_regs.gpc.l);
2139         kfree(gr->ctx_vars.ctxsw_regs.tpc.l);
2140         kfree(gr->ctx_vars.ctxsw_regs.zcull_gpc.l);
2141         kfree(gr->ctx_vars.ctxsw_regs.ppc.l);
2142         kfree(gr->ctx_vars.ctxsw_regs.pm_sys.l);
2143         kfree(gr->ctx_vars.ctxsw_regs.pm_gpc.l);
2144         kfree(gr->ctx_vars.ctxsw_regs.pm_tpc.l);
2145
2146         memset(&gr->mmu_wr_mem, 0, sizeof(struct mem_desc));
2147         memset(&gr->mmu_rd_mem, 0, sizeof(struct mem_desc));
2148         memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
2149         gr->gpc_tpc_count = NULL;
2150         gr->gpc_ppc_count = NULL;
2151         gr->pes_tpc_count[0] = NULL;
2152         gr->pes_tpc_count[1] = NULL;
2153         gr->pes_tpc_mask[0] = NULL;
2154         gr->pes_tpc_mask[1] = NULL;
2155         gr->gpc_skip_mask = NULL;
2156         gr->temp_ctx_header = NULL;
2157
2158         nvhost_allocator_destroy(&gr->comp_tags);
2159
2160         /*tbd*/
2161 }
2162
2163 static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
2164 {
2165         u32 gpc_index, pes_index;
2166         u32 pes_tpc_mask;
2167         u32 pes_tpc_count;
2168         u32 pes_heavy_index;
2169         u32 gpc_new_skip_mask;
2170         u32 tmp;
2171
2172         tmp = gk20a_readl(g, pri_ringmaster_enum_fbp_r());
2173         gr->num_fbps = pri_ringmaster_enum_fbp_count_v(tmp);
2174
2175         tmp = gk20a_readl(g, top_num_gpcs_r());
2176         gr->max_gpc_count = top_num_gpcs_value_v(tmp);
2177
2178         tmp = gk20a_readl(g, top_num_fbps_r());
2179         gr->max_fbps_count = top_num_fbps_value_v(tmp);
2180
2181         tmp = gk20a_readl(g, top_tpc_per_gpc_r());
2182         gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
2183
2184         gr->max_tpc_count = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
2185
2186         tmp = gk20a_readl(g, top_num_fbps_r());
2187         gr->sys_count = top_num_fbps_value_v(tmp);
2188
2189         tmp = gk20a_readl(g, pri_ringmaster_enum_gpc_r());
2190         gr->gpc_count = pri_ringmaster_enum_gpc_count_v(tmp);
2191
2192         gr->pe_count_per_gpc = proj_scal_litter_num_pes_per_gpc_v();
2193         gr->max_zcull_per_gpc_count = proj_scal_litter_num_zcull_banks_v();
2194
2195         if (!gr->gpc_count) {
2196                 nvhost_err(dev_from_gk20a(g), "gpc_count==0!");
2197                 goto clean_up;
2198         }
2199
2200         gr->gpc_tpc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2201         gr->gpc_zcb_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2202         gr->gpc_ppc_count = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2203         gr->pes_tpc_count[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2204         gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2205         gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2206         gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
2207         gr->gpc_skip_mask =
2208                 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
2209                         GFP_KERNEL);
2210
2211         if (!gr->gpc_tpc_count || !gr->gpc_zcb_count || !gr->gpc_ppc_count ||
2212             !gr->pes_tpc_count[0] || !gr->pes_tpc_count[1] ||
2213             !gr->pes_tpc_mask[0] || !gr->pes_tpc_mask[1] || !gr->gpc_skip_mask)
2214                 goto clean_up;
2215
2216         gr->ppc_count = 0;
2217         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2218                 tmp = gk20a_readl(g, gr_gpc0_fs_gpc_r());
2219
2220                 gr->gpc_tpc_count[gpc_index] =
2221                         gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
2222                 gr->tpc_count += gr->gpc_tpc_count[gpc_index];
2223
2224                 gr->gpc_zcb_count[gpc_index] =
2225                         gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
2226                 gr->zcb_count += gr->gpc_zcb_count[gpc_index];
2227
2228                 gr->gpc_ppc_count[gpc_index] = gr->pe_count_per_gpc;
2229                 gr->ppc_count += gr->gpc_ppc_count[gpc_index];
2230                 for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) {
2231
2232                         tmp = gk20a_readl(g,
2233                                 gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) +
2234                                 gpc_index * proj_gpc_stride_v());
2235
2236                         pes_tpc_mask = gr_gpc0_gpm_pd_pes_tpc_id_mask_mask_v(tmp);
2237                         pes_tpc_count = count_bits(pes_tpc_mask);
2238
2239                         gr->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
2240                         gr->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
2241                 }
2242
2243                 gpc_new_skip_mask = 0;
2244                 if (gr->pes_tpc_count[0][gpc_index] +
2245                     gr->pes_tpc_count[1][gpc_index] == 5) {
2246                         pes_heavy_index =
2247                                 gr->pes_tpc_count[0][gpc_index] >
2248                                 gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2249
2250                         gpc_new_skip_mask =
2251                                 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2252                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2253                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2254
2255                 } else if ((gr->pes_tpc_count[0][gpc_index] +
2256                             gr->pes_tpc_count[1][gpc_index] == 4) &&
2257                            (gr->pes_tpc_count[0][gpc_index] !=
2258                             gr->pes_tpc_count[1][gpc_index])) {
2259                                 pes_heavy_index =
2260                                     gr->pes_tpc_count[0][gpc_index] >
2261                                     gr->pes_tpc_count[1][gpc_index] ? 0 : 1;
2262
2263                         gpc_new_skip_mask =
2264                                 gr->pes_tpc_mask[pes_heavy_index][gpc_index] ^
2265                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] &
2266                                    (gr->pes_tpc_mask[pes_heavy_index][gpc_index] - 1));
2267                 }
2268                 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
2269         }
2270
2271         nvhost_dbg_info("fbps: %d", gr->num_fbps);
2272         nvhost_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
2273         nvhost_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
2274         nvhost_dbg_info("max_tpc_per_gpc_count: %d", gr->max_tpc_per_gpc_count);
2275         nvhost_dbg_info("max_zcull_per_gpc_count: %d", gr->max_zcull_per_gpc_count);
2276         nvhost_dbg_info("max_tpc_count: %d", gr->max_tpc_count);
2277         nvhost_dbg_info("sys_count: %d", gr->sys_count);
2278         nvhost_dbg_info("gpc_count: %d", gr->gpc_count);
2279         nvhost_dbg_info("pe_count_per_gpc: %d", gr->pe_count_per_gpc);
2280         nvhost_dbg_info("tpc_count: %d", gr->tpc_count);
2281         nvhost_dbg_info("ppc_count: %d", gr->ppc_count);
2282
2283         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2284                 nvhost_dbg_info("gpc_tpc_count[%d] : %d",
2285                            gpc_index, gr->gpc_tpc_count[gpc_index]);
2286         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2287                 nvhost_dbg_info("gpc_zcb_count[%d] : %d",
2288                            gpc_index, gr->gpc_zcb_count[gpc_index]);
2289         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2290                 nvhost_dbg_info("gpc_ppc_count[%d] : %d",
2291                            gpc_index, gr->gpc_ppc_count[gpc_index]);
2292         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2293                 nvhost_dbg_info("gpc_skip_mask[%d] : %d",
2294                            gpc_index, gr->gpc_skip_mask[gpc_index]);
2295         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2296                 for (pes_index = 0;
2297                      pes_index < gr->pe_count_per_gpc;
2298                      pes_index++)
2299                         nvhost_dbg_info("pes_tpc_count[%d][%d] : %d",
2300                                    pes_index, gpc_index,
2301                                    gr->pes_tpc_count[pes_index][gpc_index]);
2302
2303         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2304                 for (pes_index = 0;
2305                      pes_index < gr->pe_count_per_gpc;
2306                      pes_index++)
2307                         nvhost_dbg_info("pes_tpc_mask[%d][%d] : %d",
2308                                    pes_index, gpc_index,
2309                                    gr->pes_tpc_mask[pes_index][gpc_index]);
2310
2311         gr->bundle_cb_default_size = gr_scc_bundle_cb_size_div_256b__prod_v();
2312         gr->min_gpm_fifo_depth = gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
2313         gr->bundle_cb_token_limit = gr_pd_ab_dist_cfg2_token_limit_init_v();
2314         gr->attrib_cb_default_size = gr_gpc0_ppc0_cbm_cfg_size_default_v();
2315         /* gk20a has a fixed beta CB RAM, don't alloc more */
2316         gr->attrib_cb_size = gr->attrib_cb_default_size;
2317         gr->alpha_cb_default_size = gr_gpc0_ppc0_cbm_cfg2_size_default_v();
2318         gr->alpha_cb_size = gr->alpha_cb_default_size + (gr->alpha_cb_default_size >> 1);
2319         gr->timeslice_mode = gr_gpcs_ppcs_cbm_cfg_timeslice_mode_enable_v();
2320
2321         nvhost_dbg_info("bundle_cb_default_size: %d",
2322                    gr->bundle_cb_default_size);
2323         nvhost_dbg_info("min_gpm_fifo_depth: %d", gr->min_gpm_fifo_depth);
2324         nvhost_dbg_info("bundle_cb_token_limit: %d", gr->bundle_cb_token_limit);
2325         nvhost_dbg_info("attrib_cb_default_size: %d",
2326                    gr->attrib_cb_default_size);
2327         nvhost_dbg_info("attrib_cb_size: %d", gr->attrib_cb_size);
2328         nvhost_dbg_info("alpha_cb_default_size: %d", gr->alpha_cb_default_size);
2329         nvhost_dbg_info("alpha_cb_size: %d", gr->alpha_cb_size);
2330         nvhost_dbg_info("timeslice_mode: %d", gr->timeslice_mode);
2331
2332         return 0;
2333
2334 clean_up:
2335         return -ENOMEM;
2336 }
2337
2338 static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
2339 {
2340         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2341         void *mmu_ptr;
2342
2343         gr->mmu_wr_mem_size = gr->mmu_rd_mem_size = 0x1000;
2344
2345         gr->mmu_wr_mem.mem.ref = mem_op().alloc(memmgr, gr->mmu_wr_mem_size,
2346                                              DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2347                                              DEFAULT_NVMAP_ALLOC_FLAGS,
2348                                              NVMAP_HEAP_CARVEOUT_GENERIC);
2349         if (!gr->mmu_wr_mem.mem.ref)
2350                 goto clean_up;
2351         gr->mmu_wr_mem.mem.size = gr->mmu_wr_mem_size;
2352
2353         gr->mmu_rd_mem.mem.ref = mem_op().alloc(memmgr, gr->mmu_rd_mem_size,
2354                                              DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2355                                              DEFAULT_NVMAP_ALLOC_FLAGS,
2356                                              NVMAP_HEAP_CARVEOUT_GENERIC);
2357         if (!gr->mmu_rd_mem.mem.ref)
2358                 goto clean_up;
2359         gr->mmu_rd_mem.mem.size = gr->mmu_rd_mem_size;
2360
2361         mmu_ptr = mem_op().mmap(gr->mmu_wr_mem.mem.ref);
2362         if (!mmu_ptr)
2363                 goto clean_up;
2364         memset(mmu_ptr, 0, gr->mmu_wr_mem.mem.size);
2365         mem_op().munmap(gr->mmu_wr_mem.mem.ref, mmu_ptr);
2366
2367         mmu_ptr = mem_op().mmap(gr->mmu_rd_mem.mem.ref);
2368         if (!mmu_ptr)
2369                 goto clean_up;
2370         memset(mmu_ptr, 0, gr->mmu_rd_mem.mem.size);
2371         mem_op().munmap(gr->mmu_rd_mem.mem.ref, mmu_ptr);
2372
2373         gr->mmu_wr_mem.cpu_pa = mem_op().pin(memmgr, gr->mmu_wr_mem.mem.ref);
2374         if (gr->mmu_wr_mem.cpu_pa == -EINVAL || gr->mmu_wr_mem.cpu_pa == -EINTR)
2375                 goto clean_up;
2376
2377         gr->mmu_rd_mem.cpu_pa = mem_op().pin(memmgr, gr->mmu_rd_mem.mem.ref);
2378         if (gr->mmu_rd_mem.cpu_pa == -EINVAL || gr->mmu_rd_mem.cpu_pa == -EINTR)
2379                 goto clean_up;
2380
2381         return 0;
2382
2383 clean_up:
2384         return -ENOMEM;
2385 }
2386
2387 static u32 prime_set[18] = {
2388         2, 3, 5, 7, 11, 13, 17, 19, 23, 39, 31, 37, 41, 43, 47, 53, 59, 61 };
2389
2390 static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
2391 {
2392         s32 comm_denom;
2393         s32 mul_factor;
2394         s32 *init_frac = NULL;
2395         s32 *init_err = NULL;
2396         s32 *run_err = NULL;
2397         s32 *sorted_num_tpcs = NULL;
2398         s32 *sorted_to_unsorted_gpc_map = NULL;
2399         u32 gpc_index;
2400         u32 gpc_mark = 0;
2401         u32 num_tpc;
2402         u32 max_tpc_count = 0;
2403         u32 swap;
2404         u32 tile_count;
2405         u32 index;
2406         bool delete_map = false;
2407         bool gpc_sorted;
2408         int ret = 0;
2409
2410         init_frac = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2411         init_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2412         run_err = kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2413         sorted_num_tpcs =
2414                 kzalloc(proj_scal_max_gpcs_v() *
2415                         proj_scal_max_tpc_per_gpc_v() * sizeof(s32),
2416                         GFP_KERNEL);
2417         sorted_to_unsorted_gpc_map =
2418                 kzalloc(proj_scal_max_gpcs_v() * sizeof(s32), GFP_KERNEL);
2419
2420         if (!(init_frac && init_err && run_err && sorted_num_tpcs &&
2421               sorted_to_unsorted_gpc_map)) {
2422                 ret = -ENOMEM;
2423                 goto clean_up;
2424         }
2425
2426         gr->map_row_offset = INVALID_SCREEN_TILE_ROW_OFFSET;
2427
2428         if (gr->tpc_count == 3)
2429                 gr->map_row_offset = 2;
2430         else if (gr->tpc_count < 3)
2431                 gr->map_row_offset = 1;
2432         else {
2433                 gr->map_row_offset = 3;
2434
2435                 for (index = 1; index < 18; index++) {
2436                         u32 prime = prime_set[index];
2437                         if ((gr->tpc_count % prime) != 0) {
2438                                 gr->map_row_offset = prime;
2439                                 break;
2440                         }
2441                 }
2442         }
2443
2444         switch (gr->tpc_count) {
2445         case 15:
2446                 gr->map_row_offset = 6;
2447                 break;
2448         case 14:
2449                 gr->map_row_offset = 5;
2450                 break;
2451         case 13:
2452                 gr->map_row_offset = 2;
2453                 break;
2454         case 11:
2455                 gr->map_row_offset = 7;
2456                 break;
2457         case 10:
2458                 gr->map_row_offset = 6;
2459                 break;
2460         case 7:
2461         case 5:
2462                 gr->map_row_offset = 1;
2463                 break;
2464         default:
2465                 break;
2466         }
2467
2468         if (gr->map_tiles) {
2469                 if (gr->map_tile_count != gr->tpc_count)
2470                         delete_map = true;
2471
2472                 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
2473                         if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count)
2474                                 delete_map = true;
2475                 }
2476
2477                 if (delete_map) {
2478                         kfree(gr->map_tiles);
2479                         gr->map_tiles = NULL;
2480                         gr->map_tile_count = 0;
2481                 }
2482         }
2483
2484         if (gr->map_tiles == NULL) {
2485                 gr->map_tile_count = proj_scal_max_gpcs_v();
2486
2487                 gr->map_tiles = kzalloc(proj_scal_max_gpcs_v() * sizeof(u8), GFP_KERNEL);
2488                 if (gr->map_tiles == NULL) {
2489                         ret = -ENOMEM;
2490                         goto clean_up;
2491                 }
2492
2493                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2494                         sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
2495                         sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
2496                 }
2497
2498                 gpc_sorted = false;
2499                 while (!gpc_sorted) {
2500                         gpc_sorted = true;
2501                         for (gpc_index = 0; gpc_index < gr->gpc_count - 1; gpc_index++) {
2502                                 if (sorted_num_tpcs[gpc_index + 1] > sorted_num_tpcs[gpc_index]) {
2503                                         gpc_sorted = false;
2504                                         swap = sorted_num_tpcs[gpc_index];
2505                                         sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1];
2506                                         sorted_num_tpcs[gpc_index + 1] = swap;
2507                                         swap = sorted_to_unsorted_gpc_map[gpc_index];
2508                                         sorted_to_unsorted_gpc_map[gpc_index] =
2509                                                 sorted_to_unsorted_gpc_map[gpc_index + 1];
2510                                         sorted_to_unsorted_gpc_map[gpc_index + 1] = swap;
2511                                 }
2512                         }
2513                 }
2514
2515                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
2516                         if (gr->gpc_tpc_count[gpc_index] > max_tpc_count)
2517                                 max_tpc_count = gr->gpc_tpc_count[gpc_index];
2518
2519                 mul_factor = gr->gpc_count * max_tpc_count;
2520                 if (mul_factor & 0x1)
2521                         mul_factor = 2;
2522                 else
2523                         mul_factor = 1;
2524
2525                 comm_denom = gr->gpc_count * max_tpc_count * mul_factor;
2526
2527                 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2528                         num_tpc = sorted_num_tpcs[gpc_index];
2529
2530                         init_frac[gpc_index] = num_tpc * gr->gpc_count * mul_factor;
2531
2532                         if (num_tpc != 0)
2533                                 init_err[gpc_index] = gpc_index * max_tpc_count * mul_factor - comm_denom/2;
2534                         else
2535                                 init_err[gpc_index] = 0;
2536
2537                         run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
2538                 }
2539
2540                 while (gpc_mark < gr->tpc_count) {
2541                         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
2542                                 if ((run_err[gpc_index] * 2) >= comm_denom) {
2543                                         gr->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
2544                                         run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
2545                                 } else
2546                                         run_err[gpc_index] += init_frac[gpc_index];
2547                         }
2548                 }
2549         }
2550
2551 clean_up:
2552         kfree(init_frac);
2553         kfree(init_err);
2554         kfree(run_err);
2555         kfree(sorted_num_tpcs);
2556         kfree(sorted_to_unsorted_gpc_map);
2557
2558         if (ret)
2559                 nvhost_dbg(dbg_fn | dbg_err, "fail");
2560         else
2561                 nvhost_dbg_fn("done");
2562
2563         return ret;
2564 }
2565
2566 static int gr_gk20a_init_comptag(struct gk20a *g, struct gr_gk20a *gr)
2567 {
2568         struct mem_mgr *memmgr = mem_mgr_from_g(g);
2569
2570         /* max memory size (MB) to cover */
2571         u32 max_size = gr->max_comptag_mem;
2572         /* one tag line covers 128KB */
2573         u32 max_comptag_lines = max_size << 3;
2574
2575         u32 hw_max_comptag_lines =
2576                 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v();
2577
2578         u32 cbc_param =
2579                 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r());
2580         u32 comptags_per_cacheline =
2581                 ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param);
2582         u32 slices_per_fbp =
2583                 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param);
2584         u32 cacheline_size =
2585                 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param);
2586
2587         u32 compbit_backing_size;
2588         int ret = 0;
2589
2590         nvhost_dbg_fn("");
2591
2592         if (max_comptag_lines == 0) {
2593                 gr->compbit_store.mem.size = 0;
2594                 return 0;
2595         }
2596
2597         if (max_comptag_lines > hw_max_comptag_lines)
2598                 max_comptag_lines = hw_max_comptag_lines;
2599
2600         /* no hybird fb */
2601         compbit_backing_size =
2602                 DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) *
2603                 cacheline_size * slices_per_fbp * gr->num_fbps;
2604
2605         /* aligned to 2KB * num_fbps */
2606         compbit_backing_size +=
2607                 gr->num_fbps << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
2608
2609         /* must be a multiple of 64KB */
2610         compbit_backing_size = roundup(compbit_backing_size, 64*1024);
2611
2612         max_comptag_lines =
2613                 (compbit_backing_size * comptags_per_cacheline) /
2614                 cacheline_size * slices_per_fbp * gr->num_fbps;
2615
2616         if (max_comptag_lines > hw_max_comptag_lines)
2617                 max_comptag_lines = hw_max_comptag_lines;
2618
2619         nvhost_dbg_info("compbit backing store size : %d",
2620                 compbit_backing_size);
2621         nvhost_dbg_info("max comptag lines : %d",
2622                 max_comptag_lines);
2623
2624         gr->compbit_store.mem.ref =
2625                 mem_op().alloc(memmgr, compbit_backing_size,
2626                             DEFAULT_NVMAP_ALLOC_ALIGNMENT,
2627                             DEFAULT_NVMAP_ALLOC_FLAGS,
2628                             NVMAP_HEAP_CARVEOUT_GENERIC);
2629         if (IS_ERR_OR_NULL(gr->compbit_store.mem.ref)) {
2630                 nvhost_err(dev_from_gk20a(g), "failed to allocate"
2631                            "backing store for compbit : size %d",
2632                            compbit_backing_size);
2633                 return -ENOMEM;
2634         }
2635         gr->compbit_store.mem.size = compbit_backing_size;
2636
2637         gr->compbit_store.base_pa =
2638                 mem_op().pin(memmgr, gr->compbit_store.mem.ref);
2639         if (gr->compbit_store.base_pa == -EINVAL ||
2640             gr->compbit_store.base_pa == -EINTR) {
2641                 ret = -ENOMEM;
2642                 goto clean_up;
2643         }
2644
2645         nvhost_allocator_init(&gr->comp_tags, "comptag",
2646                         1, max_comptag_lines, 1);
2647
2648 clean_up:
2649         mem_op().put(memmgr, gr->compbit_store.mem.ref);
2650         return ret;
2651 }
2652
2653 int gk20a_gr_clear_comptags(struct gk20a *g, u32 min, u32 max)
2654 {
2655         struct gr_gk20a *gr = &g->gr;
2656         u32 fbp, slice, ctrl1, val;
2657         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2658         u32 slices_per_fbp =
2659                 ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
2660                         gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
2661
2662         nvhost_dbg_fn("");
2663
2664         if (gr->compbit_store.mem.size == 0)
2665                 return 0;
2666
2667         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
2668                      ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
2669         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
2670                      ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
2671         gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
2672                      gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) |
2673                      ltc_ltcs_ltss_cbc_ctrl1_clear_active_f());
2674
2675         for (fbp = 0; fbp < gr->num_fbps; fbp++) {
2676                 for (slice = 0; slice < slices_per_fbp; slice++) {
2677                         ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
2678                                 fbp * proj_ltc_pri_stride_v() +
2679                                 slice * proj_lts_pri_stride_v();
2680
2681                         do {
2682                                 u32 check = min_t(u32,
2683                                         GR_IDLE_CHECK_PERIOD, timeout);
2684
2685                                 val = gk20a_readl(g, ctrl1);
2686                                 if (ltc_ltc0_lts0_cbc_ctrl1_clear_v(val) !=
2687                                     ltc_ltc0_lts0_cbc_ctrl1_clear_active_v())
2688                                         break;
2689
2690                                 udelay(GR_IDLE_CHECK_PERIOD);
2691                                 timeout -= check;
2692
2693                         } while (timeout);
2694
2695                         if (timeout == 0) {
2696                                 nvhost_err(dev_from_gk20a(g),
2697                                            "comp tag clear timeout\n");
2698                                 return -EBUSY;
2699                         }
2700                 }
2701         }
2702
2703         return 0;
2704 }
2705
2706 static int gr_gk20a_init_zcull(struct gk20a *g, struct gr_gk20a *gr)
2707 {
2708         struct gr_zcull_gk20a *zcull = &gr->zcull;
2709
2710         zcull->aliquot_width = gr->tpc_count * 16;
2711         zcull->aliquot_height = 16;
2712
2713         zcull->width_align_pixels = gr->tpc_count * 16;
2714         zcull->height_align_pixels = 32;
2715
2716         zcull->aliquot_size =
2717                 zcull->aliquot_width * zcull->aliquot_height;
2718
2719         /* assume no floor sweeping since we only have 1 tpc in 1 gpc */
2720         zcull->pixel_squares_by_aliquots =
2721                 gr->zcb_count * 16 * 16 * gr->tpc_count /
2722                 (gr->gpc_count * gr->gpc_tpc_count[0]);
2723
2724         zcull->total_aliquots =
2725                 gr_gpc0_zcull_total_ram_size_num_aliquots_f(
2726                         gk20a_readl(g, gr_gpc0_zcull_total_ram_size_r()));
2727
2728         return 0;
2729 }
2730
2731 u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
2732 {
2733         /* assuming gr has already been initialized */
2734         return gr->ctx_vars.zcull_ctxsw_image_size;
2735 }
2736
2737 int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
2738                         struct channel_gk20a *c, u64 zcull_va, u32 mode)
2739 {
2740         struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx;
2741
2742         zcull_ctx->ctx_sw_mode = mode;
2743         zcull_ctx->gpu_va = zcull_va;
2744
2745         /* TBD: don't disable channel in sw method processing */
2746         return gr_gk20a_ctx_zcull_setup(g, c, true);
2747 }
2748
2749 int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
2750                         struct gr_zcull_info *zcull_params)
2751 {
2752         struct gr_zcull_gk20a *zcull = &gr->zcull;
2753
2754         zcull_params->width_align_pixels = zcull->width_align_pixels;
2755         zcull_params->height_align_pixels = zcull->height_align_pixels;
2756         zcull_params->pixel_squares_by_aliquots =
2757                 zcull->pixel_squares_by_aliquots;
2758         zcull_params->aliquot_total = zcull->total_aliquots;
2759
2760         zcull_params->region_byte_multiplier =
2761                 gr->gpc_count * gr_zcull_bytes_per_aliquot_per_gpu_v();
2762         zcull_params->region_header_size =
2763                 proj_scal_litter_num_gpcs_v() *
2764                 gr_zcull_save_restore_header_bytes_per_gpc_v();
2765
2766         zcull_params->subregion_header_size =
2767                 proj_scal_litter_num_gpcs_v() *
2768                 gr_zcull_save_restore_subregion_header_bytes_per_gpc_v();
2769
2770         zcull_params->subregion_width_align_pixels =
2771                 gr->tpc_count * gr_gpc0_zcull_zcsize_width_subregion__multiple_v();
2772         zcull_params->subregion_height_align_pixels =
2773                 gr_gpc0_zcull_zcsize_height_subregion__multiple_v();
2774         zcull_params->subregion_count = gr_zcull_subregion_qty_v();
2775
2776         return 0;
2777 }
2778
2779 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
2780                                 struct zbc_entry *color_val, u32 index)
2781 {
2782         struct fifo_gk20a *f = &g->fifo;
2783         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2784         u32 i;
2785         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2786         u32 ret;
2787
2788         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2789         if (ret) {
2790                 nvhost_err(dev_from_gk20a(g),
2791                         "failed to disable gr engine activity\n");
2792                 return ret;
2793         }
2794
2795         ret = gr_gk20a_wait_idle(g, &timeout);
2796         if (ret) {
2797                 nvhost_err(dev_from_gk20a(g),
2798                         "failed to idle graphics\n");
2799                 goto clean_up;
2800         }
2801
2802         /* update l2 table */
2803         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2804                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2805                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2806                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2807                                         GK20A_STARTOF_ZBC_TABLE));
2808
2809         for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++)
2810                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i),
2811                         color_val->color_l2[i]);
2812
2813         /* update ds table */
2814         gk20a_writel(g, gr_ds_zbc_color_r_r(),
2815                 gr_ds_zbc_color_r_val_f(color_val->color_ds[0]));
2816         gk20a_writel(g, gr_ds_zbc_color_g_r(),
2817                 gr_ds_zbc_color_g_val_f(color_val->color_ds[1]));
2818         gk20a_writel(g, gr_ds_zbc_color_b_r(),
2819                 gr_ds_zbc_color_b_val_f(color_val->color_ds[2]));
2820         gk20a_writel(g, gr_ds_zbc_color_a_r(),
2821                 gr_ds_zbc_color_a_val_f(color_val->color_ds[3]));
2822
2823         gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
2824                 gr_ds_zbc_color_fmt_val_f(color_val->format));
2825
2826         gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2827                 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2828
2829         /* trigger the write */
2830         gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2831                 gr_ds_zbc_tbl_ld_select_c_f() |
2832                 gr_ds_zbc_tbl_ld_action_write_f() |
2833                 gr_ds_zbc_tbl_ld_trigger_active_f());
2834
2835         /* update local copy */
2836         for (i = 0; i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) {
2837                 gr->zbc_col_tbl[index].color_l2[i] = color_val->color_l2[i];
2838                 gr->zbc_col_tbl[index].color_ds[i] = color_val->color_ds[i];
2839         }
2840         gr->zbc_col_tbl[index].format = color_val->format;
2841         gr->zbc_col_tbl[index].ref_cnt++;
2842
2843 clean_up:
2844         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2845         if (ret) {
2846                 nvhost_err(dev_from_gk20a(g),
2847                         "failed to enable gr engine activity\n");
2848         }
2849
2850         return ret;
2851 }
2852
2853 static int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
2854                                 struct zbc_entry *depth_val, u32 index)
2855 {
2856         struct fifo_gk20a *f = &g->fifo;
2857         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
2858         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
2859         u32 ret;
2860
2861         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
2862         if (ret) {
2863                 nvhost_err(dev_from_gk20a(g),
2864                         "failed to disable gr engine activity\n");
2865                 return ret;
2866         }
2867
2868         ret = gr_gk20a_wait_idle(g, &timeout);
2869         if (ret) {
2870                 nvhost_err(dev_from_gk20a(g),
2871                         "failed to idle graphics\n");
2872                 goto clean_up;
2873         }
2874
2875         /* update l2 table */
2876         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
2877                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
2878                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
2879                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(index +
2880                                         GK20A_STARTOF_ZBC_TABLE));
2881
2882         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(),
2883                         depth_val->depth);
2884
2885         /* update ds table */
2886         gk20a_writel(g, gr_ds_zbc_z_r(),
2887                 gr_ds_zbc_z_val_f(depth_val->depth));
2888
2889         gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
2890                 gr_ds_zbc_z_fmt_val_f(depth_val->format));
2891
2892         gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
2893                 gr_ds_zbc_tbl_index_val_f(index + GK20A_STARTOF_ZBC_TABLE));
2894
2895         /* trigger the write */
2896         gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
2897                 gr_ds_zbc_tbl_ld_select_z_f() |
2898                 gr_ds_zbc_tbl_ld_action_write_f() |
2899                 gr_ds_zbc_tbl_ld_trigger_active_f());
2900
2901         /* update local copy */
2902         gr->zbc_dep_tbl[index].depth = depth_val->depth;
2903         gr->zbc_dep_tbl[index].format = depth_val->format;
2904         gr->zbc_dep_tbl[index].ref_cnt++;
2905
2906 clean_up:
2907         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
2908         if (ret) {
2909                 nvhost_err(dev_from_gk20a(g),
2910                         "failed to enable gr engine activity\n");
2911         }
2912
2913         return ret;
2914 }
2915
2916 int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
2917                      struct zbc_entry *zbc_val)
2918 {
2919         struct zbc_color_table *c_tbl;
2920         struct zbc_depth_table *d_tbl;
2921         u32 i, ret = -ENOMEM;
2922         bool added = false;
2923
2924         /* no endian swap ? */
2925
2926         switch (zbc_val->type) {
2927         case GK20A_ZBC_TYPE_COLOR:
2928                 /* search existing tables */
2929                 for (i = 0; i < gr->max_used_color_index; i++) {
2930
2931                         c_tbl = &gr->zbc_col_tbl[i];
2932
2933                         if (c_tbl->ref_cnt && c_tbl->format == zbc_val->format &&
2934                             memcmp(c_tbl->color_ds, zbc_val->color_ds,
2935                                 sizeof(zbc_val->color_ds)) == 0) {
2936
2937                                 if (memcmp(c_tbl->color_l2, zbc_val->color_l2,
2938                                     sizeof(zbc_val->color_l2))) {
2939                                         nvhost_err(dev_from_gk20a(g),
2940                                                 "zbc l2 and ds color don't match with existing entries");
2941                                         return -EINVAL;
2942                                 }
2943                                 added = true;
2944                                 c_tbl->ref_cnt++;
2945                                 ret = 0;
2946                                 break;
2947                         }
2948                 }
2949                 /* add new table */
2950                 if (!added &&
2951                     gr->max_used_color_index < GK20A_ZBC_TABLE_SIZE) {
2952
2953                         c_tbl =
2954                             &gr->zbc_col_tbl[gr->max_used_color_index];
2955                         WARN_ON(c_tbl->ref_cnt != 0);
2956
2957                         ret = gr_gk20a_add_zbc_color(g, gr,
2958                                 zbc_val, gr->max_used_color_index);
2959
2960                         if (!ret)
2961                                 gr->max_used_color_index++;
2962                 }
2963                 break;
2964         case GK20A_ZBC_TYPE_DEPTH:
2965                 /* search existing tables */
2966                 for (i = 0; i < gr->max_used_depth_index; i++) {
2967
2968                         d_tbl = &gr->zbc_dep_tbl[i];
2969
2970                         if (d_tbl->ref_cnt &&
2971                             d_tbl->depth == zbc_val->depth &&
2972                             d_tbl->format == zbc_val->format) {
2973                                 added = true;
2974                                 d_tbl->ref_cnt++;
2975                                 ret = 0;
2976                                 break;
2977                         }
2978                 }
2979                 /* add new table */
2980                 if (!added &&
2981                     gr->max_used_depth_index < GK20A_ZBC_TABLE_SIZE) {
2982
2983                         d_tbl =
2984                             &gr->zbc_dep_tbl[gr->max_used_depth_index];
2985                         WARN_ON(d_tbl->ref_cnt != 0);
2986
2987                         ret = gr_gk20a_add_zbc_depth(g, gr,
2988                                 zbc_val, gr->max_used_depth_index);
2989
2990                         if (!ret)
2991                                 gr->max_used_depth_index++;
2992                 }
2993                 break;
2994         default:
2995                 nvhost_err(dev_from_gk20a(g),
2996                         "invalid zbc table type %d", zbc_val->type);
2997                 return -EINVAL;
2998         }
2999
3000         if (added && ret == 0) {
3001                 /* update zbc for elpg */
3002         }
3003
3004         return ret;
3005 }
3006
3007 int gr_gk20a_clear_zbc_table(struct gk20a *g, struct gr_gk20a *gr)
3008 {
3009         struct fifo_gk20a *f = &g->fifo;
3010         struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
3011         u32 i, j;
3012         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3013         u32 ret;
3014
3015         ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
3016         if (ret) {
3017                 nvhost_err(dev_from_gk20a(g),
3018                         "failed to disable gr engine activity\n");
3019                 return ret;
3020         }
3021
3022         ret = gr_gk20a_wait_idle(g, &timeout);
3023         if (ret) {
3024                 nvhost_err(dev_from_gk20a(g),
3025                         "failed to idle graphics\n");
3026                 goto clean_up;
3027         }
3028
3029         for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3030                 gr->zbc_col_tbl[i].format = 0;
3031                 gr->zbc_col_tbl[i].ref_cnt = 0;
3032
3033                 gk20a_writel(g, gr_ds_zbc_color_fmt_r(),
3034                         gr_ds_zbc_color_fmt_val_invalid_f());
3035                 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3036                         gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3037
3038                 /* trigger the write */
3039                 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3040                         gr_ds_zbc_tbl_ld_select_c_f() |
3041                         gr_ds_zbc_tbl_ld_action_write_f() |
3042                         gr_ds_zbc_tbl_ld_trigger_active_f());
3043
3044                 /* clear l2 table */
3045                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3046                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3047                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3048                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
3049                                         GK20A_STARTOF_ZBC_TABLE));
3050
3051                 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++) {
3052                         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
3053                         gr->zbc_col_tbl[i].color_l2[j] = 0;
3054                         gr->zbc_col_tbl[i].color_ds[j] = 0;
3055                 }
3056         }
3057         gr->max_used_color_index = 0;
3058         gr->max_default_color_index = 0;
3059
3060         for (i = 0; i < GK20A_ZBC_TABLE_SIZE; i++) {
3061                 gr->zbc_dep_tbl[i].depth = 0;
3062                 gr->zbc_dep_tbl[i].format = 0;
3063                 gr->zbc_dep_tbl[i].ref_cnt = 0;
3064
3065                 gk20a_writel(g, gr_ds_zbc_z_fmt_r(),
3066                         gr_ds_zbc_z_fmt_val_invalid_f());
3067                 gk20a_writel(g, gr_ds_zbc_tbl_index_r(),
3068                         gr_ds_zbc_tbl_index_val_f(i + GK20A_STARTOF_ZBC_TABLE));
3069
3070                 /* trigger the write */
3071                 gk20a_writel(g, gr_ds_zbc_tbl_ld_r(),
3072                         gr_ds_zbc_tbl_ld_select_z_f() |
3073                         gr_ds_zbc_tbl_ld_action_write_f() |
3074                         gr_ds_zbc_tbl_ld_trigger_active_f());
3075
3076                 /* clear l2 table */
3077                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3078                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3079                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3080                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(i +
3081                                         GK20A_STARTOF_ZBC_TABLE));
3082
3083                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
3084         }
3085         gr->max_used_depth_index = 0;
3086         gr->max_default_depth_index = 0;
3087
3088 clean_up:
3089         ret = gk20a_fifo_enable_engine_activity(g, gr_info);
3090         if (ret) {
3091                 nvhost_err(dev_from_gk20a(g),
3092                         "failed to enable gr engine activity\n");
3093         }
3094
3095         /* elpg stuff */
3096
3097         return ret;
3098 }
3099
3100 /* get a zbc table entry specified by index
3101  * return table size when type is invalid */
3102 int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
3103                         struct zbc_query_params *query_params)
3104 {
3105         u32 index = query_params->index_size;
3106         u32 i;
3107
3108         switch (query_params->type) {
3109         case GK20A_ZBC_TYPE_INVALID:
3110                 query_params->index_size = GK20A_ZBC_TABLE_SIZE;
3111                 break;
3112         case GK20A_ZBC_TYPE_COLOR:
3113                 if (index >= GK20A_ZBC_TABLE_SIZE) {
3114                         nvhost_err(dev_from_gk20a(g),
3115                                 "invalid zbc color table index\n");
3116                         return -EINVAL;
3117                 }
3118                 for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3119                         query_params->color_l2[i] =
3120                                 gr->zbc_col_tbl[index].color_l2[i];
3121                         query_params->color_ds[i] =
3122                                 gr->zbc_col_tbl[index].color_ds[i];
3123                 }
3124                 query_params->format = gr->zbc_col_tbl[index].format;
3125                 query_params->ref_cnt = gr->zbc_col_tbl[index].ref_cnt;
3126                 break;
3127         case GK20A_ZBC_TYPE_DEPTH:
3128                 if (index >= GK20A_ZBC_TABLE_SIZE) {
3129                         nvhost_err(dev_from_gk20a(g),
3130                                 "invalid zbc depth table index\n");
3131                         return -EINVAL;
3132                 }
3133                 query_params->depth = gr->zbc_dep_tbl[index].depth;
3134                 query_params->format = gr->zbc_dep_tbl[index].format;
3135                 query_params->ref_cnt = gr->zbc_dep_tbl[index].ref_cnt;
3136                 break;
3137         default:
3138                 nvhost_err(dev_from_gk20a(g),
3139                                 "invalid zbc table type\n");
3140                 return -EINVAL;
3141         }
3142
3143         return 0;
3144 }
3145
3146 static int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
3147 {
3148         struct zbc_entry zbc_val;
3149         u32 i, err;
3150
3151         /* load default color table */
3152         zbc_val.type = GK20A_ZBC_TYPE_COLOR;
3153
3154         zbc_val.format = gr_ds_zbc_color_fmt_val_zero_v();
3155         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3156                 zbc_val.color_ds[i] = 0;
3157                 zbc_val.color_l2[i] = 0;
3158         }
3159         err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3160
3161         zbc_val.format = gr_ds_zbc_color_fmt_val_unorm_one_v();
3162         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3163                 zbc_val.color_ds[i] = 0xffffffff;
3164                 zbc_val.color_l2[i] = 0x3f800000;
3165         }
3166         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3167
3168         zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3169         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3170                 zbc_val.color_ds[i] = 0;
3171                 zbc_val.color_l2[i] = 0;
3172         }
3173         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3174
3175         zbc_val.format = gr_ds_zbc_color_fmt_val_rf32_gf32_bf32_af32_v();
3176         for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
3177                 zbc_val.color_ds[i] = 0x3f800000;
3178                 zbc_val.color_l2[i] = 0x3f800000;
3179         }
3180         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3181
3182         if (!err)
3183                 gr->max_default_color_index = 4;
3184         else {
3185                 nvhost_err(dev_from_gk20a(g),
3186                            "fail to load default zbc color table\n");
3187                 return err;
3188         }
3189
3190         /* load default depth table */
3191         zbc_val.type = GK20A_ZBC_TYPE_DEPTH;
3192
3193         zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3194         zbc_val.depth = 0;
3195         err = gr_gk20a_add_zbc(g, gr, &zbc_val);
3196
3197         zbc_val.format = gr_ds_zbc_z_fmt_val_fp32_v();
3198         zbc_val.depth = 0x3f800000;
3199         err |= gr_gk20a_add_zbc(g, gr, &zbc_val);
3200
3201         if (!err)
3202                 gr->max_default_depth_index = 2;
3203         else {
3204                 nvhost_err(dev_from_gk20a(g),
3205                            "fail to load default zbc depth table\n");
3206                 return err;
3207         }
3208
3209         return 0;
3210 }
3211
3212 static int gr_gk20a_init_zbc(struct gk20a *g, struct gr_gk20a *gr)
3213 {
3214         u32 i, j;
3215
3216         /* reset zbc clear */
3217         for (i = 0; i < GK20A_SIZEOF_ZBC_TABLE -
3218             GK20A_STARTOF_ZBC_TABLE; i++) {
3219                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(),
3220                         (gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()) &
3221                          ~ltc_ltcs_ltss_dstg_zbc_index_address_f(~0)) |
3222                                 ltc_ltcs_ltss_dstg_zbc_index_address_f(
3223                                         i + GK20A_STARTOF_ZBC_TABLE));
3224                 for (j = 0; j < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); j++)
3225                         gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(j), 0);
3226                 gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), 0);
3227         }
3228
3229         gr_gk20a_clear_zbc_table(g, gr);
3230
3231         gr_gk20a_load_zbc_default_table(g, gr);
3232
3233         return 0;
3234 }
3235
3236 static void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine)
3237 {
3238         u32 gate_ctrl, idle_filter;
3239
3240         gate_ctrl = gk20a_readl(g, therm_gate_ctrl_r(engine));
3241
3242         switch (mode) {
3243         case ELCG_RUN:
3244                 gate_ctrl = set_field(gate_ctrl,
3245                                 therm_gate_ctrl_eng_clk_m(),
3246                                 therm_gate_ctrl_eng_clk_run_f());
3247                 gate_ctrl = set_field(gate_ctrl,
3248                                 therm_gate_ctrl_eng_pwr_m(),
3249                                 /* set elpg to auto to meet hw expectation */
3250                                 therm_gate_ctrl_eng_pwr_auto_f());
3251                 break;
3252         case ELCG_STOP:
3253                 gate_ctrl = set_field(gate_ctrl,
3254                                 therm_gate_ctrl_eng_clk_m(),
3255                                 therm_gate_ctrl_eng_clk_stop_f());
3256                 break;
3257         case ELCG_AUTO:
3258                 gate_ctrl = set_field(gate_ctrl,
3259                                 therm_gate_ctrl_eng_clk_m(),
3260                                 therm_gate_ctrl_eng_clk_auto_f());
3261                 break;
3262         default:
3263                 nvhost_err(dev_from_gk20a(g),
3264                         "invalid elcg mode %d", mode);
3265         }
3266
3267         if (tegra_revision == TEGRA_REVISION_SIM) {
3268                 gate_ctrl = set_field(gate_ctrl,
3269                         therm_gate_ctrl_eng_delay_after_m(),
3270                         therm_gate_ctrl_eng_delay_after_f(4));
3271         }
3272
3273         /* 2 * (1 << 5) = 64 clks */
3274         gate_ctrl = set_field(gate_ctrl,
3275                 therm_gate_ctrl_eng_idle_filt_exp_m(),
3276                 therm_gate_ctrl_eng_idle_filt_exp_f(5));
3277         gate_ctrl = set_field(gate_ctrl,
3278                 therm_gate_ctrl_eng_idle_filt_mant_m(),
3279                 therm_gate_ctrl_eng_idle_filt_mant_f(2));
3280         gk20a_writel(g, therm_gate_ctrl_r(engine), gate_ctrl);
3281
3282         /* default fecs_idle_filter to 0 */
3283         idle_filter = gk20a_readl(g, therm_fecs_idle_filter_r());
3284         idle_filter &= ~therm_fecs_idle_filter_value_m();
3285         gk20a_writel(g, therm_fecs_idle_filter_r(), idle_filter);
3286         /* default hubmmu_idle_filter to 0 */
3287         idle_filter = gk20a_readl(g, therm_hubmmu_idle_filter_r());
3288         idle_filter &= ~therm_hubmmu_idle_filter_value_m();
3289         gk20a_writel(g, therm_hubmmu_idle_filter_r(), idle_filter);
3290 }
3291
3292 static void gr_gk20a_load_gating_prod(struct gk20a *g,
3293                 const struct gating_desc *desc, u32 size, bool prod)
3294 {
3295         u32 i;
3296         for (i = 0; i < size; i++) {
3297                 if (prod)
3298                         gk20a_writel(g, desc->addr, desc->prod);
3299                 else
3300                         gk20a_writel(g, desc->addr, desc->disable);
3301         }
3302 }
3303
3304 static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
3305 {
3306         u32 gpc_index, gpc_tpc_count, gpc_zcull_count;
3307         u32 *zcull_map_tiles, *zcull_bank_counters;
3308         u32 map_counter;
3309         u32 rcp_conserv;
3310         u32 offset;
3311         bool floorsweep = false;
3312
3313         if (!gr->map_tiles)
3314                 return -1;
3315
3316         zcull_map_tiles = kzalloc(proj_scal_max_gpcs_v() *
3317                         proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3318         zcull_bank_counters = kzalloc(proj_scal_max_gpcs_v() *
3319                         proj_scal_max_tpc_per_gpc_v() * sizeof(u32), GFP_KERNEL);
3320
3321         if (!zcull_map_tiles && !zcull_bank_counters) {
3322                 nvhost_err(dev_from_gk20a(g),
3323                         "failed to allocate zcull temp buffers");
3324                 return -ENOMEM;
3325         }
3326
3327         for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
3328                 zcull_map_tiles[map_counter] =
3329                         zcull_bank_counters[gr->map_tiles[map_counter]];
3330                 zcull_bank_counters[gr->map_tiles[map_counter]]++;
3331         }
3332
3333         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map0_r(),
3334                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_0_f(zcull_map_tiles[0]) |
3335                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_1_f(zcull_map_tiles[1]) |
3336                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_2_f(zcull_map_tiles[2]) |
3337                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_3_f(zcull_map_tiles[3]) |
3338                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_4_f(zcull_map_tiles[4]) |
3339                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_5_f(zcull_map_tiles[5]) |
3340                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_6_f(zcull_map_tiles[6]) |
3341                 gr_gpcs_zcull_sm_in_gpc_number_map0_tile_7_f(zcull_map_tiles[7]));
3342
3343         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map1_r(),
3344                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_8_f(zcull_map_tiles[8]) |
3345                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_9_f(zcull_map_tiles[9]) |
3346                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_10_f(zcull_map_tiles[10]) |
3347                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_11_f(zcull_map_tiles[11]) |
3348                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_12_f(zcull_map_tiles[12]) |
3349                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_13_f(zcull_map_tiles[13]) |
3350                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_14_f(zcull_map_tiles[14]) |
3351                 gr_gpcs_zcull_sm_in_gpc_number_map1_tile_15_f(zcull_map_tiles[15]));
3352
3353         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map2_r(),
3354                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_16_f(zcull_map_tiles[16]) |
3355                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_17_f(zcull_map_tiles[17]) |
3356                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_18_f(zcull_map_tiles[18]) |
3357                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_19_f(zcull_map_tiles[19]) |
3358                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_20_f(zcull_map_tiles[20]) |
3359                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_21_f(zcull_map_tiles[21]) |
3360                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_22_f(zcull_map_tiles[22]) |
3361                 gr_gpcs_zcull_sm_in_gpc_number_map2_tile_23_f(zcull_map_tiles[23]));
3362
3363         gk20a_writel(g, gr_gpcs_zcull_sm_in_gpc_number_map3_r(),
3364                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_24_f(zcull_map_tiles[24]) |
3365                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_25_f(zcull_map_tiles[25]) |
3366                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_26_f(zcull_map_tiles[26]) |
3367                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_27_f(zcull_map_tiles[27]) |
3368                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_28_f(zcull_map_tiles[28]) |
3369                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_29_f(zcull_map_tiles[29]) |
3370                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_30_f(zcull_map_tiles[30]) |
3371                 gr_gpcs_zcull_sm_in_gpc_number_map3_tile_31_f(zcull_map_tiles[31]));
3372
3373         kfree(zcull_map_tiles);
3374         kfree(zcull_bank_counters);
3375
3376         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3377                 gpc_tpc_count = gr->gpc_tpc_count[gpc_index];
3378                 gpc_zcull_count = gr->gpc_zcb_count[gpc_index];
3379
3380                 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3381                     gpc_zcull_count < gpc_tpc_count) {
3382                         nvhost_err(dev_from_gk20a(g),
3383                                 "zcull_banks (%d) less than tpcs (%d) for gpc (%d)",
3384                                 gpc_zcull_count, gpc_tpc_count, gpc_index);
3385                         return -EINVAL;
3386                 }
3387                 if (gpc_zcull_count != gr->max_zcull_per_gpc_count &&
3388                     gpc_zcull_count != 0)
3389                         floorsweep = true;
3390         }
3391
3392         /* 1.0f / 1.0f * gr_gpc0_zcull_sm_num_rcp_conservative__max_v() */
3393         rcp_conserv = gr_gpc0_zcull_sm_num_rcp_conservative__max_v();
3394
3395         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3396                 offset = gpc_index * proj_gpc_stride_v();
3397
3398                 if (floorsweep) {
3399                         gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3400                                 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3401                                 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3402                                         gr->max_zcull_per_gpc_count));
3403                 } else {
3404                         gk20a_writel(g, gr_gpc0_zcull_ram_addr_r() + offset,
3405                                 gr_gpc0_zcull_ram_addr_row_offset_f(gr->map_row_offset) |
3406                                 gr_gpc0_zcull_ram_addr_tiles_per_hypertile_row_per_gpc_f(
3407                                         gr->gpc_tpc_count[gpc_index]));
3408                 }
3409
3410                 gk20a_writel(g, gr_gpc0_zcull_fs_r() + offset,
3411                         gr_gpc0_zcull_fs_num_active_banks_f(gr->gpc_zcb_count[gpc_index]) |
3412                         gr_gpc0_zcull_fs_num_sms_f(gr->tpc_count));
3413
3414                 gk20a_writel(g, gr_gpc0_zcull_sm_num_rcp_r() + offset,
3415                         gr_gpc0_zcull_sm_num_rcp_conservative_f(rcp_conserv));
3416         }
3417
3418         gk20a_writel(g, gr_gpcs_ppcs_wwdx_sm_num_rcp_r(),
3419                 gr_gpcs_ppcs_wwdx_sm_num_rcp_conservative_f(rcp_conserv));
3420
3421         return 0;
3422 }
3423
3424 static int gk20a_init_gr_setup_hw(struct gk20a *g)
3425 {
3426         struct gr_gk20a *gr = &g->gr;
3427         struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
3428         struct av_list_gk20a *sw_bundle_init = &g->gr.ctx_vars.sw_bundle_init;
3429         struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
3430         u32 data;
3431         u32 addr_lo, addr_hi, addr;
3432         u32 compbit_base_post_divide;
3433         u32 compbit_base_post_multiply;
3434         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3435         u32 fe_go_idle_timeout_save;
3436         u32 last_bundle_data = 0;
3437         u32 last_method_data = 0;
3438         u32 i, err;
3439
3440         nvhost_dbg_fn("");
3441
3442         /* slcg prod values */
3443         gr_gk20a_load_gating_prod(g, gk20a_slcg_gr,
3444                 sizeof(gk20a_slcg_gr)/sizeof(struct gating_desc), true);
3445         gr_gk20a_load_gating_prod(g, gk20a_slcg_perf,
3446                 sizeof(gk20a_slcg_perf)/sizeof(struct gating_desc), true);
3447
3448         /* init mmu debug buffer */
3449         addr_lo = u64_lo32(gr->mmu_wr_mem.cpu_pa);
3450         addr_hi = u64_hi32(gr->mmu_wr_mem.cpu_pa);
3451         addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
3452                 (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
3453
3454         gk20a_writel(g, fb_mmu_debug_wr_r(),
3455                      fb_mmu_debug_wr_aperture_vid_mem_f() |
3456                      fb_mmu_debug_wr_vol_false_f() |
3457                      fb_mmu_debug_wr_addr_v(addr));
3458
3459         addr_lo = u64_lo32(gr->mmu_rd_mem.cpu_pa);
3460         addr_hi = u64_hi32(gr->mmu_rd_mem.cpu_pa);
3461         addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
3462                 (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
3463
3464         gk20a_writel(g, fb_mmu_debug_rd_r(),
3465                      fb_mmu_debug_rd_aperture_vid_mem_f() |
3466                      fb_mmu_debug_rd_vol_false_f() |
3467                      fb_mmu_debug_rd_addr_v(addr));
3468
3469         /* load gr floorsweeping registers */
3470         data = gk20a_readl(g, gr_gpc0_ppc0_pes_vsc_strem_r());
3471         data = set_field(data, gr_gpc0_ppc0_pes_vsc_strem_master_pe_m(),
3472                         gr_gpc0_ppc0_pes_vsc_strem_master_pe_true_f());
3473         gk20a_writel(g, gr_gpc0_ppc0_pes_vsc_strem_r(), data);
3474
3475         gr_gk20a_zcull_init_hw(g, gr);
3476
3477         gr_gk20a_load_gating_prod(g, gk20a_blcg_gr,
3478                 sizeof(gk20a_blcg_gr)/sizeof(struct gating_desc), true);
3479         gr_gk20a_load_gating_prod(g, gk20a_pg_gr,
3480                 sizeof(gk20a_pg_gr)/sizeof(struct gating_desc), true);
3481
3482         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_GR_GK20A);
3483         gr_gk20a_init_elcg_mode(g, ELCG_AUTO, ENGINE_CE2_GK20A);
3484
3485         /* enable fifo access */
3486         gk20a_writel(g, gr_gpfifo_ctl_r(),
3487                      gr_gpfifo_ctl_access_enabled_f() |
3488                      gr_gpfifo_ctl_semaphore_access_enabled_f());
3489
3490         /* TBD: reload gr ucode when needed */
3491
3492         /* enable interrupts */
3493         gk20a_writel(g, gr_intr_r(), 0xFFFFFFFF);
3494         gk20a_writel(g, gr_intr_en_r(), 0xFFFFFFFF);
3495
3496         /* enable fecs error interrupts */
3497         gk20a_writel(g, gr_fecs_host_int_enable_r(),
3498                      gr_fecs_host_int_enable_fault_during_ctxsw_enable_f() |
3499                      gr_fecs_host_int_enable_umimp_firmware_method_enable_f() |
3500                      gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
3501                      gr_fecs_host_int_enable_watchdog_enable_f());
3502
3503         /* enable exceptions */
3504         gk20a_writel(g, gr_fe_hww_esr_r(),
3505                      gr_fe_hww_esr_en_enable_f() |
3506                      gr_fe_hww_esr_reset_active_f());
3507         gk20a_writel(g, gr_memfmt_hww_esr_r(),
3508                      gr_memfmt_hww_esr_en_enable_f() |
3509                      gr_memfmt_hww_esr_reset_active_f());
3510         gk20a_writel(g, gr_scc_hww_esr_r(),
3511                      gr_scc_hww_esr_en_enable_f() |
3512                      gr_scc_hww_esr_reset_active_f());
3513         gk20a_writel(g, gr_mme_hww_esr_r(),
3514                      gr_mme_hww_esr_en_enable_f() |
3515                      gr_mme_hww_esr_reset_active_f());
3516         gk20a_writel(g, gr_pd_hww_esr_r(),
3517                      gr_pd_hww_esr_en_enable_f() |
3518                      gr_pd_hww_esr_reset_active_f());
3519         gk20a_writel(g, gr_sked_hww_esr_r(), /* enabled by default */
3520                      gr_sked_hww_esr_reset_active_f());
3521         gk20a_writel(g, gr_ds_hww_esr_r(),
3522                      gr_ds_hww_esr_en_enabled_f() |
3523                      gr_ds_hww_esr_reset_task_f());
3524         gk20a_writel(g, gr_ds_hww_report_mask_r(),
3525                      gr_ds_hww_report_mask_sph0_err_report_f() |
3526                      gr_ds_hww_report_mask_sph1_err_report_f() |
3527                      gr_ds_hww_report_mask_sph2_err_report_f() |
3528                      gr_ds_hww_report_mask_sph3_err_report_f() |
3529                      gr_ds_hww_report_mask_sph4_err_report_f() |
3530                      gr_ds_hww_report_mask_sph5_err_report_f() |
3531                      gr_ds_hww_report_mask_sph6_err_report_f() |
3532                      gr_ds_hww_report_mask_sph7_err_report_f() |
3533                      gr_ds_hww_report_mask_sph8_err_report_f() |
3534                      gr_ds_hww_report_mask_sph9_err_report_f() |
3535                      gr_ds_hww_report_mask_sph10_err_report_f() |
3536                      gr_ds_hww_report_mask_sph11_err_report_f() |
3537                      gr_ds_hww_report_mask_sph12_err_report_f() |
3538                      gr_ds_hww_report_mask_sph13_err_report_f() |
3539                      gr_ds_hww_report_mask_sph14_err_report_f() |
3540                      gr_ds_hww_report_mask_sph15_err_report_f() |
3541                      gr_ds_hww_report_mask_sph16_err_report_f() |
3542                      gr_ds_hww_report_mask_sph17_err_report_f() |
3543                      gr_ds_hww_report_mask_sph18_err_report_f() |
3544                      gr_ds_hww_report_mask_sph19_err_report_f() |
3545                      gr_ds_hww_report_mask_sph20_err_report_f() |
3546                      gr_ds_hww_report_mask_sph21_err_report_f() |
3547                      gr_ds_hww_report_mask_sph22_err_report_f() |
3548                      gr_ds_hww_report_mask_sph23_err_report_f());
3549
3550         /* TBD: ECC for L1/SM */
3551         /* TBD: enable per GPC exceptions */
3552         /* TBD: enable per BE exceptions */
3553
3554         /* reset and enable all exceptions */
3555         gk20a_writel(g, gr_exception_r(), 0xFFFFFFFF);
3556         gk20a_writel(g, gr_exception_en_r(), 0xFFFFFFFF);
3557         gk20a_writel(g, gr_exception1_r(), 0xFFFFFFFF);
3558         gk20a_writel(g, gr_exception1_en_r(), 0xFFFFFFFF);
3559         gk20a_writel(g, gr_exception2_r(), 0xFFFFFFFF);
3560         gk20a_writel(g, gr_exception2_en_r(), 0xFFFFFFFF);
3561
3562         /* ignore status from some units */
3563         data = gk20a_readl(g, gr_status_mask_r());
3564         gk20a_writel(g, gr_status_mask_r(), data & gr->status_disable_mask);
3565
3566         gr_gk20a_init_zbc(g, gr);
3567
3568         compbit_base_post_divide = u64_lo32(
3569                 (gr->compbit_store.base_pa >>
3570                         ltc_ltc0_lts0_cbc_base_alignment_shift_v()) /
3571                         gr->num_fbps);
3572
3573         compbit_base_post_multiply = ((u64)compbit_base_post_divide *
3574                 gr->num_fbps) << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
3575
3576         if (compbit_base_post_multiply < gr->compbit_store.base_pa)
3577                 compbit_base_post_divide++;
3578
3579         gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(),
3580                 compbit_base_post_divide);
3581
3582         /* load ctx init */
3583         for (i = 0; i < sw_ctx_load->count; i++)
3584                 gk20a_writel(g, sw_ctx_load->l[i].addr,
3585                              sw_ctx_load->l[i].value);
3586
3587         /* TBD: add gr ctx overrides */
3588
3589         err = gr_gk20a_wait_idle(g, &timeout);
3590         if (err)
3591                 goto out;
3592
3593         /* save and disable fe_go_idle */
3594         fe_go_idle_timeout_save =
3595                 gk20a_readl(g, gr_fe_go_idle_timeout_r());
3596         gk20a_writel(g, gr_fe_go_idle_timeout_r(),
3597                 (fe_go_idle_timeout_save & gr_fe_go_idle_timeout_count_f(0)) |
3598                 gr_fe_go_idle_timeout_count_disabled_f());
3599
3600         /* override a few ctx state registers */
3601         gr_gk20a_commit_global_cb_manager(g, NULL, 0);
3602         gr_gk20a_commit_global_timeslice(g, NULL, 0);
3603
3604         /* floorsweep anything left */
3605         gr_gk20a_ctx_state_floorsweep(g);
3606
3607         err = gr_gk20a_wait_idle(g, &timeout);
3608         if (err)
3609                 goto restore_fe_go_idle;
3610
3611         /* enable pipe mode override */
3612         gk20a_writel(g, gr_pipe_bundle_config_r(),
3613                 gr_pipe_bundle_config_override_pipe_mode_enabled_f());
3614
3615         /* load bundle init */
3616         err = 0;
3617         for (i = 0; i < sw_bundle_init->count; i++) {
3618
3619                 if (i == 0 || last_bundle_data != sw_bundle_init->l[i].value) {
3620                         gk20a_writel(g, gr_pipe_bundle_data_r(),
3621                                 sw_bundle_init->l[i].value);
3622                         last_bundle_data = sw_bundle_init->l[i].value;
3623                 }
3624
3625                 gk20a_writel(g, gr_pipe_bundle_address_r(),
3626                              sw_bundle_init->l[i].addr);
3627
3628                 if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
3629                     GR_GO_IDLE_BUNDLE)
3630                         err |= gr_gk20a_wait_idle(g, &timeout);
3631                 else if (0) { /* IS_SILICON */
3632                         do {
3633                                 u32 gr_status = gk20a_readl(g, gr_status_r());
3634                                 u32 check = min_t(u32, GR_IDLE_CHECK_PERIOD,
3635                                                   timeout);
3636
3637                                 if (gr_status_fe_method_lower_v(gr_status) ==
3638                                     gr_status_fe_method_lower_idle_v())
3639                                         break;
3640
3641                                 udelay(GR_IDLE_CHECK_PERIOD);
3642
3643                                 timeout -= check;
3644                         } while (timeout);
3645                 }
3646         }
3647
3648         /* disable pipe mode override */
3649         gk20a_writel(g, gr_pipe_bundle_config_r(),
3650                      gr_pipe_bundle_config_override_pipe_mode_disabled_f());
3651
3652 restore_fe_go_idle:
3653         /* restore fe_go_idle */
3654         gk20a_writel(g, gr_fe_go_idle_timeout_r(), fe_go_idle_timeout_save);
3655
3656         if (err || gr_gk20a_wait_idle(g, &timeout))
3657                 goto out;
3658
3659         /* load method init */
3660         if (sw_method_init->count) {
3661                 gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3662                              sw_method_init->l[0].value);
3663                 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3664                              gr_pri_mme_shadow_raw_index_write_trigger_f() |
3665                              sw_method_init->l[0].addr);
3666                 last_method_data = sw_method_init->l[0].value;
3667         }
3668         for (i = 1; i < sw_method_init->count; i++) {
3669                 if (sw_method_init->l[i].value != last_method_data) {
3670                         gk20a_writel(g, gr_pri_mme_shadow_raw_data_r(),
3671                                 sw_method_init->l[i].value);
3672                         last_method_data = sw_method_init->l[i].value;
3673                 }
3674                 gk20a_writel(g, gr_pri_mme_shadow_raw_index_r(),
3675                         gr_pri_mme_shadow_raw_index_write_trigger_f() |
3676                         sw_method_init->l[i].addr);
3677         }
3678
3679         err = gr_gk20a_wait_idle(g, &timeout);
3680         if (err)
3681                 goto out;
3682
3683 out:
3684         nvhost_dbg_fn("done");
3685         return 0;
3686 }
3687
3688 static int gk20a_init_gr_prepare(struct gk20a *g)
3689 {
3690         u32 gpfifo_ctrl, pmc_en;
3691         u32 err = 0;
3692
3693         /* disable fifo access */
3694         gpfifo_ctrl = gk20a_readl(g, gr_gpfifo_ctl_r());
3695         gpfifo_ctrl &= ~gr_gpfifo_ctl_access_enabled_f();
3696         gk20a_writel(g, gr_gpfifo_ctl_r(), gpfifo_ctrl);
3697
3698         /* reset gr engine */
3699         pmc_en = gk20a_readl(g, mc_enable_r());
3700         pmc_en &= ~mc_enable_pgraph_enabled_f();
3701         pmc_en &= ~mc_enable_blg_enabled_f();
3702         pmc_en &= ~mc_enable_perfmon_enabled_f();
3703         gk20a_writel(g, mc_enable_r(), pmc_en);
3704
3705         pmc_en = gk20a_readl(g, mc_enable_r());
3706         pmc_en |= mc_enable_pgraph_enabled_f();
3707         pmc_en |= mc_enable_blg_enabled_f();
3708         pmc_en |= mc_enable_perfmon_enabled_f();
3709         gk20a_writel(g, mc_enable_r(), pmc_en);
3710         pmc_en = gk20a_readl(g, mc_enable_r());
3711
3712         /* enable fifo access */
3713         gk20a_writel(g, gr_gpfifo_ctl_r(),
3714                 gr_gpfifo_ctl_access_enabled_f() |
3715                 gr_gpfifo_ctl_semaphore_access_enabled_f());
3716
3717         if (!g->gr.ctx_vars.valid) {
3718                 err = gr_gk20a_init_ctx_vars(g, &g->gr);
3719                 if (err)
3720                         nvhost_err(dev_from_gk20a(g),
3721                                 "fail to load gr init ctx");
3722         }
3723
3724         return err;
3725 }
3726
3727 static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
3728 {
3729         struct gr_gk20a *gr = &g->gr;
3730         struct av_list_gk20a *sw_non_ctx_load = &g->gr.ctx_vars.sw_non_ctx_load;
3731         u32 timeout = GR_IDLE_TIMEOUT_DEFAULT;
3732         u32 i, err = 0;
3733
3734         nvhost_dbg_fn("");
3735
3736         /* enable interrupts */
3737         gk20a_writel(g, gr_intr_r(), ~0);
3738         gk20a_writel(g, gr_intr_en_r(), ~0);
3739
3740         /* reset ctx switch state */
3741         gr_gk20a_ctx_reset(g, 0);
3742
3743         /* clear scc ram */
3744         gk20a_writel(g, gr_scc_init_r(),
3745                 gr_scc_init_ram_trigger_f());
3746
3747         /* load non_ctx init */
3748         for (i = 0; i < sw_non_ctx_load->count; i++)
3749                 gk20a_writel(g, sw_non_ctx_load->l[i].addr,
3750                         sw_non_ctx_load->l[i].value);
3751
3752         err = gr_gk20a_wait_idle(g, &timeout);
3753         if (err)
3754                 goto out;
3755
3756         err = gr_gk20a_load_ctxsw_ucode(g, gr);
3757         if (err)
3758                 goto out;
3759
3760 out:
3761         if (err)
3762                 nvhost_dbg(dbg_fn | dbg_err, "fail");
3763         else
3764                 nvhost_dbg_fn("done");
3765
3766         return 0;
3767 }
3768
3769 static int gk20a_init_gr_setup_sw(struct gk20a *g, bool reinit)
3770 {
3771         struct gr_gk20a *gr = &g->gr;
3772         int err;
3773
3774         nvhost_dbg_fn("");
3775
3776         if (reinit) {
3777                 nvhost_dbg_fn("skip init");
3778                 return 0;
3779         }
3780
3781         gr->g = g;
3782
3783         err = gr_gk20a_init_gr_config(g, gr);
3784         if (err)
3785                 goto clean_up;
3786
3787         err = gr_gk20a_init_mmu_sw(g, gr);
3788         if (err)
3789                 goto clean_up;
3790
3791         err = gr_gk20a_init_map_tiles(g, gr);
3792         if (err)
3793                 goto clean_up;
3794
3795 #if CONFIG_GK20A_SIM
3796         gr->max_comptag_mem = 1; /* MBs worth of comptag coverage */
3797 #else
3798         nvhost_dbg_info("total ram pages : %lu", totalram_pages);
3799         gr->max_comptag_mem = totalram_pages >> (10 - (PAGE_SHIFT - 10));
3800 #endif
3801         err = gr_gk20a_init_comptag(g, gr);
3802         if (err)
3803                 goto clean_up;
3804
3805         err = gr_gk20a_init_zcull(g, gr);
3806         if (err)
3807                 goto clean_up;
3808
3809         err = gr_gk20a_init_ctx_state(g, gr);
3810         if (err)
3811                 goto clean_up;
3812
3813         err = gr_gk20a_alloc_global_ctx_buffers(g);
3814         if (err)
3815                 goto clean_up;
3816
3817         gr->remove_support = gk20a_remove_gr_support;
3818         nvhost_dbg_fn("done");
3819         return 0;
3820
3821 clean_up:
3822         nvhost_dbg(dbg_fn | dbg_err, "fail");
3823         gk20a_remove_gr_support(g, gr);
3824         return err;
3825 }
3826
3827 int gk20a_init_gr_support(struct gk20a *g, bool reinit)
3828 {
3829         struct gr_gk20a *gr = &g->gr;
3830         u32 err;
3831
3832         if (gr->initialized)
3833                 return 0;
3834
3835         err = gk20a_init_gr_prepare(g);
3836         if (err)
3837                 return err;
3838
3839         err = gk20a_init_gr_reset_enable_hw(g);
3840         if (err)
3841                 return err;
3842
3843         err = gk20a_init_gr_setup_sw(g, false);
3844         if (err)
3845                 return err;
3846
3847         err = gk20a_init_gr_setup_hw(g);
3848         if (err)
3849                 return err;
3850
3851         gr->initialized = true;
3852
3853         return 0;
3854 }
3855
3856 #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE   0x02dc
3857 #define NVA297_SET_CIRCULAR_BUFFER_SIZE         0x1280
3858 #define NVA297_SET_SHADER_EXCEPTIONS            0x1528
3859
3860 #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
3861
3862 struct gr_isr_data {
3863         u32 addr;
3864         u32 data_lo;
3865         u32 data_hi;
3866         u32 curr_ctx;
3867         u32 chid;
3868         u32 offset;
3869         u32 sub_chan;
3870         u32 class_num;
3871 };
3872
3873 static void gk20a_gr_set_shader_exceptions(struct gk20a *g,
3874                                            struct gr_isr_data *isr_data)
3875 {
3876         u32 val;
3877
3878         nvhost_dbg_fn("");
3879
3880         if (isr_data->data_lo ==
3881             NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE)
3882                 val = 0;
3883         else
3884                 val = ~0;
3885
3886         gk20a_writel(g,
3887                 gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r(),
3888                 val);
3889         gk20a_writel(g,
3890                 gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r(),
3891                 val);
3892 }
3893
3894 static void gk20a_gr_set_circular_buffer_size(struct gk20a *g,
3895                         struct gr_isr_data *isr_data)
3896 {
3897         struct gr_gk20a *gr = &g->gr;
3898         u32 gpc_index, ppc_index, stride, val, offset;
3899         u32 cb_size = isr_data->data_lo * 4;
3900
3901         nvhost_dbg_fn("");
3902
3903         if (cb_size > gr->attrib_cb_size)
3904                 cb_size = gr->attrib_cb_size;
3905
3906         gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3907                 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3908                  ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) |
3909                  gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size));
3910
3911         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3912                 stride = proj_gpc_stride_v() * gpc_index;
3913
3914                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3915                         ppc_index++) {
3916
3917                         val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg_r() +
3918                                 stride +
3919                                 proj_ppc_in_gpc_stride_v() * ppc_index);
3920
3921                         offset = gr_gpc0_ppc0_cbm_cfg_start_offset_v(val);
3922
3923                         val = set_field(val,
3924                                 gr_gpc0_ppc0_cbm_cfg_size_m(),
3925                                 gr_gpc0_ppc0_cbm_cfg_size_f(cb_size *
3926                                         gr->pes_tpc_count[ppc_index][gpc_index]));
3927                         val = set_field(val,
3928                                 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3929                                 (offset + 1));
3930
3931                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3932                                 stride +
3933                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3934
3935                         val = set_field(val,
3936                                 gr_gpc0_ppc0_cbm_cfg_start_offset_m(),
3937                                 offset);
3938
3939                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg_r() +
3940                                 stride +
3941                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3942                 }
3943         }
3944 }
3945
3946 static void gk20a_gr_set_alpha_circular_buffer_size(struct gk20a *g,
3947                                                 struct gr_isr_data *isr_data)
3948 {
3949         struct gr_gk20a *gr = &g->gr;
3950         u32 gpc_index, ppc_index, stride, val;
3951         u32 pd_ab_max_output;
3952         u32 alpha_cb_size = isr_data->data_lo * 4;
3953
3954         nvhost_dbg_fn("");
3955         /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF)
3956                 return; */
3957
3958         if (alpha_cb_size > gr->alpha_cb_size)
3959                 alpha_cb_size = gr->alpha_cb_size;
3960
3961         gk20a_writel(g, gr_ds_tga_constraintlogic_r(),
3962                 (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) &
3963                  ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) |
3964                  gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size));
3965
3966         pd_ab_max_output = alpha_cb_size *
3967                 gr_gpc0_ppc0_cbm_cfg_size_granularity_v() /
3968                 gr_pd_ab_dist_cfg1_max_output_granularity_v();
3969
3970         gk20a_writel(g, gr_pd_ab_dist_cfg1_r(),
3971                 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output));
3972
3973         for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3974                 stride = proj_gpc_stride_v() * gpc_index;
3975
3976                 for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index];
3977                         ppc_index++) {
3978
3979                         val = gk20a_readl(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3980                                 stride +
3981                                 proj_ppc_in_gpc_stride_v() * ppc_index);
3982
3983                         val = set_field(val, gr_gpc0_ppc0_cbm_cfg2_size_m(),
3984                                         gr_gpc0_ppc0_cbm_cfg2_size_f(alpha_cb_size *
3985                                                 gr->pes_tpc_count[ppc_index][gpc_index]));
3986
3987                         gk20a_writel(g, gr_gpc0_ppc0_cbm_cfg2_r() +
3988                                 stride +
3989                                 proj_ppc_in_gpc_stride_v() * ppc_index, val);
3990                 }
3991         }
3992 }
3993
3994 static int gk20a_gr_handle_illegal_method(struct gk20a *g,
3995                                           struct gr_isr_data *isr_data)
3996 {
3997         nvhost_dbg_fn("");
3998
3999         if (isr_data->class_num == KEPLER_C) {
4000                 switch (isr_data->offset << 2) {
4001                 case NVA297_SET_SHADER_EXCEPTIONS:
4002                         gk20a_gr_set_shader_exceptions(g, isr_data);
4003                         break;
4004                 case NVA297_SET_CIRCULAR_BUFFER_SIZE:
4005                         gk20a_gr_set_circular_buffer_size(g, isr_data);
4006                         break;
4007                 case NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
4008                         gk20a_gr_set_alpha_circular_buffer_size(g, isr_data);
4009                         break;
4010                 default:
4011                         nvhost_err(dev_from_gk20a(g), "invalid method "
4012                                    "class 0x%08x, offset 0x%08x",
4013                                    isr_data->class_num, isr_data->offset);
4014                         return -EINVAL;
4015                 }
4016                 return 0;
4017         }
4018
4019         nvhost_err(dev_from_gk20a(g),
4020                    "invalid method class 0x%08x, offset 0x%08x",
4021                    isr_data->class_num, isr_data->offset);
4022         return -EINVAL;
4023 }
4024
4025 static int gk20a_gr_handle_notify_pending(struct gk20a *g,
4026                                           struct gr_isr_data *isr_data)
4027 {
4028         struct fifo_gk20a *f = &g->fifo;
4029         struct channel_gk20a *ch = &f->channel[isr_data->chid];
4030
4031         nvhost_dbg_fn("");
4032
4033         wake_up(&ch->notifier_wq);
4034
4035         return 0;
4036 }
4037
4038 /* Used by sw interrupt thread to translate current ctx to chid.
4039  * For performance, we don't want to go through 128 channels every time.
4040  * A small tlb is used here to cache translation */
4041 static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
4042 {
4043         struct fifo_gk20a *f = &g->fifo;
4044         struct gr_gk20a *gr = &g->gr;
4045         u32 chid;
4046         u32 i;
4047
4048         /* check cache first */
4049         for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++)
4050                 if (gr->chid_tlb[i].curr_ctx == curr_ctx)
4051                         return gr->chid_tlb[i].hw_chid;
4052
4053         /* slow path */
4054         for (chid = 0; chid < f->num_channels; chid++)
4055                 if (f->channel[chid].inst_block.cpu_pa ==
4056                     curr_ctx << ram_in_base_shift_v())
4057                         break;
4058
4059         if (chid >= f->num_channels)
4060                 return -1;
4061
4062         /* add to free tlb entry */
4063         for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++)
4064                 if (gr->chid_tlb[i].curr_ctx == 0) {
4065                         gr->chid_tlb[i].curr_ctx = curr_ctx;
4066                         gr->chid_tlb[i].hw_chid = chid;
4067                         return chid;
4068                 }
4069
4070         /* no free entry, flush one */
4071         gr->chid_tlb[gr->channel_tlb_flush_index].curr_ctx = curr_ctx;
4072         gr->chid_tlb[gr->channel_tlb_flush_index].hw_chid = chid;
4073
4074         gr->channel_tlb_flush_index =
4075                 (gr->channel_tlb_flush_index + 1) &
4076                 (GR_CHANNEL_MAP_TLB_SIZE - 1);
4077
4078         return chid;
4079 }
4080
4081 void gk20a_gr_isr(struct gk20a *g)
4082 {
4083         struct gr_isr_data isr_data;
4084         u32 grfifo_ctl;
4085         u32 obj_table;
4086         u32 ret;
4087         u32 gr_intr = gk20a_readl(g, gr_intr_r());
4088
4089         nvhost_dbg_fn("");
4090
4091         if (!gr_intr)
4092                 return;
4093
4094         grfifo_ctl = gk20a_readl(g, gr_gpfifo_ctl_r());
4095         grfifo_ctl &= ~gr_gpfifo_ctl_semaphore_access_f(1);
4096         grfifo_ctl &= ~gr_gpfifo_ctl_access_f(1);
4097
4098         gk20a_writel(g, gr_gpfifo_ctl_r(),
4099                 grfifo_ctl | gr_gpfifo_ctl_access_f(0) |
4100                 gr_gpfifo_ctl_semaphore_access_f(0));
4101
4102         isr_data.addr = gk20a_readl(g, gr_trapped_addr_r());
4103         isr_data.data_lo = gk20a_readl(g, gr_trapped_data_lo_r());
4104         isr_data.data_hi = gk20a_readl(g, gr_trapped_data_hi_r());
4105         isr_data.curr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r());
4106         isr_data.offset = gr_trapped_addr_mthd_v(isr_data.addr);
4107         isr_data.sub_chan = gr_trapped_addr_subch_v(isr_data.addr);
4108         obj_table = gk20a_readl(g,
4109                 gr_fe_object_table_r(isr_data.sub_chan));
4110         isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table);
4111
4112         isr_data.chid =
4113                 gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx);
4114         if (isr_data.chid == -1) {
4115                 nvhost_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x",
4116                            isr_data.curr_ctx);
4117                 goto clean_up;
4118         }
4119
4120         nvhost_dbg(dbg_intr, "channel %d: addr 0x%08x, "
4121                 "data 0x%08x 0x%08x,"
4122                 "ctx 0x%08x, offset 0x%08x, "
4123                 "subchannel 0x%08x, class 0x%08x",
4124                 isr_data.chid, isr_data.addr,
4125                 isr_data.data_hi, isr_data.data_lo,
4126                 isr_data.curr_ctx, isr_data.offset,
4127                 isr_data.sub_chan, isr_data.class_num);
4128
4129         if (gr_intr & gr_intr_notify_pending_f()) {
4130                 gk20a_gr_handle_notify_pending(g, &isr_data);
4131                 gk20a_writel(g, gr_intr_r(),
4132                         gr_intr_notify_reset_f());
4133                 gr_intr &= ~gr_intr_notify_pending_f();
4134         }
4135
4136         if (gr_intr & gr_intr_illegal_method_pending_f()) {
4137                 ret = gk20a_gr_handle_illegal_method(g, &isr_data);
4138                 if (!ret) {
4139                         gk20a_writel(g, gr_intr_r(),
4140                                 gr_intr_illegal_method_reset_f());
4141                         gr_intr &= ~gr_intr_illegal_method_pending_f();
4142                 }
4143         }
4144
4145 clean_up:
4146         gk20a_writel(g, gr_gpfifo_ctl_r(),
4147                 grfifo_ctl | gr_gpfifo_ctl_access_f(1) |
4148                 gr_gpfifo_ctl_semaphore_access_f(1));
4149
4150         if (gr_intr)
4151                 nvhost_err(dev_from_gk20a(g),
4152                            "unhandled gr interrupt 0x%08x", gr_intr);
4153 }
4154
4155 int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
4156 {
4157         BUG_ON(size == NULL);
4158         return gr_gk20a_submit_fecs_method(g, 0, 0, ~0, 1,
4159                 gr_fecs_method_push_adr_discover_reglist_image_size_v(),
4160                 size, GR_IS_UCODE_OP_NOT_EQUAL, 0, GR_IS_UCODE_OP_SKIP, 0);
4161 }
4162
4163 int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
4164 {
4165         return gr_gk20a_submit_fecs_method(g, 4,
4166                 gr_fecs_current_ctx_ptr_f(addr >> 12) |
4167                 gr_fecs_current_ctx_valid_f(1) | gr_fecs_current_ctx_target_vid_mem_f(),
4168                 ~0, 1, gr_fecs_method_push_adr_set_reglist_bind_instance_f(),
4169                 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4170 }
4171
4172 int gr_gk20a_fecs_set_reglist_virual_addr(struct gk20a *g, u64 pmu_va)
4173 {
4174         return gr_gk20a_submit_fecs_method(g, 4, u64_lo32(pmu_va >> 8),
4175                 ~0, 1, gr_fecs_method_push_adr_set_reglist_virtual_address_f(),
4176                 0, GR_IS_UCODE_OP_EQUAL, 1, GR_IS_UCODE_OP_SKIP, 0);
4177 }