gpu: nvgpu: support gk20a virtualization
[linux-3.10.git] / drivers / gpu / nvgpu / vgpu / gr_vgpu.c
1 /*
2  * Virtualized GPU Graphics
3  *
4  * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  */
15
16 #include "vgpu/vgpu.h"
17 #include "gk20a/hw_gr_gk20a.h"
18
19 static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
20 {
21         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
22         struct tegra_vgpu_cmd_msg msg;
23         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
24         int err;
25
26         gk20a_dbg_fn("");
27
28         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX;
29         msg.handle = platform->virt_handle;
30         p->handle = c->virt_ctx;
31         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
32
33         return (err || msg.ret) ? -1 : 0;
34 }
35
36 static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g,
37                                         struct channel_gk20a *c, bool patch)
38 {
39         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
40         struct tegra_vgpu_cmd_msg msg;
41         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
42         int err;
43
44         gk20a_dbg_fn("");
45
46         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX;
47         msg.handle = platform->virt_handle;
48         p->handle = c->virt_ctx;
49         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
50
51         return (err || msg.ret) ? -1 : 0;
52 }
53
54 /* load saved fresh copy of gloden image into channel gr_ctx */
55 static int vgpu_gr_load_golden_ctx_image(struct gk20a *g,
56                                         struct channel_gk20a *c)
57 {
58         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
59         struct tegra_vgpu_cmd_msg msg;
60         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
61         int err;
62
63         gk20a_dbg_fn("");
64
65         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX;
66         msg.handle = platform->virt_handle;
67         p->handle = c->virt_ctx;
68         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
69
70         return (err || msg.ret) ? -1 : 0;
71 }
72
73 static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
74 {
75         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
76
77         gk20a_dbg_fn("");
78
79         vgpu_get_attribute(platform->virt_handle,
80                         TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE,
81                         &g->gr.ctx_vars.golden_image_size);
82         vgpu_get_attribute(platform->virt_handle,
83                         TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE,
84                         &g->gr.ctx_vars.zcull_ctxsw_image_size);
85         if (!g->gr.ctx_vars.golden_image_size ||
86                 !g->gr.ctx_vars.zcull_ctxsw_image_size)
87                 return -ENXIO;
88
89         gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
90         g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
91         return 0;
92 }
93
94 static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
95 {
96         struct gr_gk20a *gr = &g->gr;
97         int attr_buffer_size;
98
99         u32 cb_buffer_size = gr->bundle_cb_default_size *
100                 gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
101
102         u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
103                 gr_scc_pagepool_total_pages_byte_granularity_v();
104
105         gk20a_dbg_fn("");
106
107         attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
108
109         gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
110         gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
111
112         gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
113         gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
114
115         gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
116         gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
117
118         gk20a_dbg_info("priv access map size : %d",
119                 gr->ctx_vars.priv_access_map_size);
120         gr->global_ctx_buffer[PRIV_ACCESS_MAP].size =
121                 gr->ctx_vars.priv_access_map_size;
122
123         return 0;
124 }
125
126 static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
127                                         struct channel_gk20a *c)
128 {
129         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
130         struct tegra_vgpu_cmd_msg msg;
131         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
132         struct vm_gk20a *ch_vm = c->vm;
133         u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
134         u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
135         struct gr_gk20a *gr = &g->gr;
136         u64 gpu_va;
137         u32 i;
138         int err;
139
140         gk20a_dbg_fn("");
141
142         /* FIXME: add VPR support */
143
144         /* Circular Buffer */
145         gpu_va = gk20a_vm_alloc_va(ch_vm,
146                                 gr->global_ctx_buffer[CIRCULAR].size, 0);
147
148         if (!gpu_va)
149                 goto clean_up;
150         g_bfr_va[CIRCULAR_VA] = gpu_va;
151         g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size;
152
153         /* Attribute Buffer */
154         gpu_va = gk20a_vm_alloc_va(ch_vm,
155                                 gr->global_ctx_buffer[ATTRIBUTE].size, 0);
156
157         if (!gpu_va)
158                 goto clean_up;
159         g_bfr_va[ATTRIBUTE_VA] = gpu_va;
160         g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size;
161
162         /* Page Pool */
163         gpu_va = gk20a_vm_alloc_va(ch_vm,
164                                 gr->global_ctx_buffer[PAGEPOOL].size, 0);
165         if (!gpu_va)
166                 goto clean_up;
167         g_bfr_va[PAGEPOOL_VA] = gpu_va;
168         g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size;
169
170         /* Priv register Access Map */
171         gpu_va = gk20a_vm_alloc_va(ch_vm,
172                                 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0);
173         if (!gpu_va)
174                 goto clean_up;
175         g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
176         g_bfr_size[PRIV_ACCESS_MAP_VA] =
177                 gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
178
179         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
180         msg.handle = platform->virt_handle;
181         p->handle = c->virt_ctx;
182         p->cb_va = g_bfr_va[CIRCULAR_VA];
183         p->attr_va = g_bfr_va[ATTRIBUTE_VA];
184         p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
185         p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
186         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
187         if (err || msg.ret)
188                 goto clean_up;
189
190         c->ch_ctx.global_ctx_buffer_mapped = true;
191         return 0;
192
193  clean_up:
194         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
195                 if (g_bfr_va[i]) {
196                         gk20a_vm_free_va(ch_vm, g_bfr_va[i],
197                                         g_bfr_size[i], 0);
198                         g_bfr_va[i] = 0;
199                 }
200         }
201         return -ENOMEM;
202 }
203
204 static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
205 {
206         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
207         struct vm_gk20a *ch_vm = c->vm;
208         u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
209         u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
210         u32 i;
211
212         gk20a_dbg_fn("");
213
214         if (c->ch_ctx.global_ctx_buffer_mapped) {
215                 struct tegra_vgpu_cmd_msg msg;
216                 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
217                 int err;
218
219                 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
220                 msg.handle = platform->virt_handle;
221                 p->handle = c->virt_ctx;
222                 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
223                 WARN_ON(err || msg.ret);
224         }
225
226         for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
227                 if (g_bfr_va[i]) {
228                         gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0);
229                         g_bfr_va[i] = 0;
230                         g_bfr_size[i] = 0;
231                 }
232         }
233         c->ch_ctx.global_ctx_buffer_mapped = false;
234 }
235
236 static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g,
237                                         struct channel_gk20a *c)
238 {
239         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
240         struct tegra_vgpu_cmd_msg msg;
241         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
242         struct gr_gk20a *gr = &g->gr;
243         struct gr_ctx_desc *gr_ctx;
244         struct vm_gk20a *ch_vm = c->vm;
245         int err;
246
247         gk20a_dbg_fn("");
248
249         if (gr->ctx_vars.buffer_size == 0)
250                 return 0;
251
252         /* alloc channel gr ctx buffer */
253         gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
254         gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
255
256         gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
257         if (!gr_ctx)
258                 return -ENOMEM;
259
260         gr_ctx->size = gr->ctx_vars.buffer_total_size;
261         gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0);
262
263         if (!gr_ctx->gpu_va) {
264                 kfree(gr_ctx);
265                 return -ENOMEM;
266         }
267
268         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX;
269         msg.handle = platform->virt_handle;
270         p->handle = c->virt_ctx;
271         p->gr_ctx_va = gr_ctx->gpu_va;
272         p->class_num = c->obj_class;
273         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
274
275         if (err || msg.ret) {
276                 gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0);
277                 err = -ENOMEM;
278         } else
279                 c->ch_ctx.gr_ctx = gr_ctx;
280
281         return err;
282 }
283
284 static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
285 {
286         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
287         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
288         struct vm_gk20a *ch_vm = c->vm;
289
290         gk20a_dbg_fn("");
291
292         if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) {
293                 struct tegra_vgpu_cmd_msg msg;
294                 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
295                 int err;
296
297                 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
298                 msg.handle = platform->virt_handle;
299                 p->handle = c->virt_ctx;
300                 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
301                 WARN_ON(err || msg.ret);
302
303                 gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va,
304                                 ch_ctx->gr_ctx->size, 0);
305                 ch_ctx->gr_ctx->gpu_va = 0;
306                 kfree(ch_ctx->gr_ctx);
307         }
308 }
309
310 static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
311                                         struct channel_gk20a *c)
312 {
313         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
314         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
315         struct vm_gk20a *ch_vm = c->vm;
316         struct tegra_vgpu_cmd_msg msg;
317         struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
318         int err;
319
320         gk20a_dbg_fn("");
321
322         patch_ctx->size = 128 * sizeof(u32);
323         patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0);
324         if (!patch_ctx->gpu_va)
325                 return -ENOMEM;
326
327         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
328         msg.handle = platform->virt_handle;
329         p->handle = c->virt_ctx;
330         p->patch_ctx_va = patch_ctx->gpu_va;
331         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332         if (err || msg.ret) {
333                 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
334                 err = -ENOMEM;
335         }
336
337         return err;
338 }
339
340 static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
341 {
342         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
343         struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
344         struct vm_gk20a *ch_vm = c->vm;
345
346         gk20a_dbg_fn("");
347
348         if (patch_ctx->gpu_va) {
349                 struct tegra_vgpu_cmd_msg msg;
350                 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
351                 int err;
352
353                 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
354                 msg.handle = platform->virt_handle;
355                 p->handle = c->virt_ctx;
356                 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
357                 WARN_ON(err || msg.ret);
358
359                 gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
360                 patch_ctx->gpu_va = 0;
361         }
362 }
363
364 static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
365 {
366         gk20a_dbg_fn("");
367
368         vgpu_gr_unmap_global_ctx_buffers(c);
369         vgpu_gr_free_channel_patch_ctx(c);
370         if (!gk20a_is_channel_marked_as_tsg(c))
371                 vgpu_gr_free_channel_gr_ctx(c);
372
373         /* zcull_ctx, pm_ctx */
374
375         memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
376
377         c->num_objects = 0;
378         c->first_init = false;
379 }
380
381 static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
382                                 struct nvhost_alloc_obj_ctx_args *args)
383 {
384         struct gk20a *g = c->g;
385         struct fifo_gk20a *f = &g->fifo;
386         struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
387         struct tsg_gk20a *tsg = NULL;
388         int err = 0;
389
390         gk20a_dbg_fn("");
391
392         /* an address space needs to have been bound at this point.*/
393         if (!gk20a_channel_as_bound(c)) {
394                 gk20a_err(dev_from_gk20a(g),
395                            "not bound to address space at time"
396                            " of grctx allocation");
397                 return -EINVAL;
398         }
399
400         if (!g->ops.gr.is_valid_class(g, args->class_num)) {
401                 gk20a_err(dev_from_gk20a(g),
402                            "invalid obj class 0x%x", args->class_num);
403                 err = -EINVAL;
404                 goto out;
405         }
406         c->obj_class = args->class_num;
407
408         /* FIXME: add TSG support */
409         if (gk20a_is_channel_marked_as_tsg(c))
410                 tsg = &f->tsg[c->tsgid];
411
412         /* allocate gr ctx buffer */
413         if (!ch_ctx->gr_ctx) {
414                 err = vgpu_gr_alloc_channel_gr_ctx(g, c);
415                 if (err) {
416                         gk20a_err(dev_from_gk20a(g),
417                                 "fail to allocate gr ctx buffer");
418                         goto out;
419                 }
420         } else {
421                 /*TBD: needs to be more subtle about which is
422                  * being allocated as some are allowed to be
423                  * allocated along same channel */
424                 gk20a_err(dev_from_gk20a(g),
425                         "too many classes alloc'd on same channel");
426                 err = -EINVAL;
427                 goto out;
428         }
429
430         /* commit gr ctx buffer */
431         err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
432         if (err) {
433                 gk20a_err(dev_from_gk20a(g),
434                         "fail to commit gr ctx buffer");
435                 goto out;
436         }
437
438         /* allocate patch buffer */
439         if (ch_ctx->patch_ctx.pages == NULL) {
440                 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
441                 if (err) {
442                         gk20a_err(dev_from_gk20a(g),
443                                 "fail to allocate patch buffer");
444                         goto out;
445                 }
446         }
447
448         /* map global buffer to channel gpu_va and commit */
449         if (!ch_ctx->global_ctx_buffer_mapped) {
450                 err = vgpu_gr_map_global_ctx_buffers(g, c);
451                 if (err) {
452                         gk20a_err(dev_from_gk20a(g),
453                                 "fail to map global ctx buffer");
454                         goto out;
455                 }
456                 gr_gk20a_elpg_protected_call(g,
457                                 vgpu_gr_commit_global_ctx_buffers(g, c, true));
458         }
459
460         /* load golden image */
461         if (!c->first_init) {
462                 err = gr_gk20a_elpg_protected_call(g,
463                                 vgpu_gr_load_golden_ctx_image(g, c));
464                 if (err) {
465                         gk20a_err(dev_from_gk20a(g),
466                                 "fail to load golden ctx image");
467                         goto out;
468                 }
469                 c->first_init = true;
470         }
471
472         c->num_objects++;
473
474         gk20a_dbg_fn("done");
475         return 0;
476 out:
477         /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
478            can be reused so no need to release them.
479            2. golden image load is a one time thing so if
480            they pass, no need to undo. */
481         gk20a_err(dev_from_gk20a(g), "fail");
482         return err;
483 }
484
485 static int vgpu_gr_free_obj_ctx(struct channel_gk20a  *c,
486                                 struct nvhost_free_obj_ctx_args *args)
487 {
488         unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
489
490         gk20a_dbg_fn("");
491
492         if (c->num_objects == 0)
493                 return 0;
494
495         c->num_objects--;
496
497         if (c->num_objects == 0) {
498                 c->first_init = false;
499                 gk20a_disable_channel(c,
500                         !c->has_timedout,
501                         timeout);
502         }
503
504         return 0;
505 }
506
507 static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
508 {
509         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
510
511         gk20a_dbg_fn("");
512
513         if (vgpu_get_attribute(platform->virt_handle,
514                         TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
515                 return -ENOMEM;
516
517         if (vgpu_get_attribute(platform->virt_handle,
518                         TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
519                         &gr->max_tpc_per_gpc_count))
520                 return -ENOMEM;
521
522         if (vgpu_get_attribute(platform->virt_handle,
523                         TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
524                         &gr->max_tpc_count))
525                 return -ENOMEM;
526
527         g->ops.gr.bundle_cb_defaults(g);
528         g->ops.gr.cb_size_default(g);
529         g->ops.gr.calc_global_ctx_buffer_size(g);
530         return 0;
531 }
532
533 static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
534                                 struct channel_gk20a *c, u64 zcull_va,
535                                 u32 mode)
536 {
537         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
538         struct tegra_vgpu_cmd_msg msg;
539         struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
540         int err;
541
542         gk20a_dbg_fn("");
543
544         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL;
545         msg.handle = platform->virt_handle;
546         p->handle = c->virt_ctx;
547         p->zcull_va = zcull_va;
548         p->mode = mode;
549         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
550
551         return (err || msg.ret) ? -ENOMEM : 0;
552 }
553
554 static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
555                                 struct gr_zcull_info *zcull_params)
556 {
557         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
558         struct tegra_vgpu_cmd_msg msg;
559         struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
560         int err;
561
562         gk20a_dbg_fn("");
563
564         msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO;
565         msg.handle = platform->virt_handle;
566         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
567         if (err || msg.ret)
568                 return -ENOMEM;
569
570         zcull_params->width_align_pixels = p->width_align_pixels;
571         zcull_params->height_align_pixels = p->height_align_pixels;
572         zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots;
573         zcull_params->aliquot_total = p->aliquot_total;
574         zcull_params->region_byte_multiplier = p->region_byte_multiplier;
575         zcull_params->region_header_size = p->region_header_size;
576         zcull_params->subregion_header_size = p->subregion_header_size;
577         zcull_params->subregion_width_align_pixels =
578                 p->subregion_width_align_pixels;
579         zcull_params->subregion_height_align_pixels =
580                 p->subregion_height_align_pixels;
581         zcull_params->subregion_count = p->subregion_count;
582
583         return 0;
584 }
585
586 static void vgpu_remove_gr_support(struct gr_gk20a *gr)
587 {
588         gk20a_dbg_fn("");
589
590         gk20a_allocator_destroy(&gr->comp_tags);
591 }
592
593 static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
594 {
595         struct gr_gk20a *gr = &g->gr;
596         int err;
597
598         gk20a_dbg_fn("");
599
600         if (gr->sw_ready) {
601                 gk20a_dbg_fn("skip init");
602                 return 0;
603         }
604
605         gr->g = g;
606
607         err = vgpu_gr_init_gr_config(g, gr);
608         if (err)
609                 goto clean_up;
610
611         err = vgpu_gr_init_ctx_state(g, gr);
612         if (err)
613                 goto clean_up;
614
615         err = g->ops.ltc.init_comptags(g, gr);
616         if (err)
617                 goto clean_up;
618
619         err = vgpu_gr_alloc_global_ctx_buffers(g);
620         if (err)
621                 goto clean_up;
622
623         mutex_init(&gr->ctx_mutex);
624
625         gr->remove_support = vgpu_remove_gr_support;
626         gr->sw_ready = true;
627
628         gk20a_dbg_fn("done");
629         return 0;
630
631 clean_up:
632         gk20a_err(dev_from_gk20a(g), "fail");
633         vgpu_remove_gr_support(gr);
634         return err;
635 }
636
637 int vgpu_init_gr_support(struct gk20a *g)
638 {
639         gk20a_dbg_fn("");
640
641         return vgpu_gr_init_gr_setup_sw(g);
642 }
643
644 struct gr_isr_data {
645         u32 addr;
646         u32 data_lo;
647         u32 data_hi;
648         u32 curr_ctx;
649         u32 chid;
650         u32 offset;
651         u32 sub_chan;
652         u32 class_num;
653 };
654
655 static int vgpu_gr_handle_notify_pending(struct gk20a *g,
656                                         struct gr_isr_data *isr_data)
657 {
658         struct fifo_gk20a *f = &g->fifo;
659         struct channel_gk20a *ch = &f->channel[isr_data->chid];
660
661         gk20a_dbg_fn("");
662         wake_up(&ch->notifier_wq);
663         return 0;
664 }
665
666 int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
667 {
668         struct gr_isr_data isr_data;
669
670         gk20a_dbg_fn("");
671
672         isr_data.chid = info->chid;
673
674         if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY)
675                 vgpu_gr_handle_notify_pending(g, &isr_data);
676
677         return 0;
678 }
679
680 void vgpu_init_gr_ops(struct gpu_ops *gops)
681 {
682         gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
683         gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx;
684         gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx;
685         gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull;
686         gops->gr.get_zcull_info = vgpu_gr_get_zcull_info;
687 }