drivers: video: tegra: change buffer gpu cacheable and add flush
Jin Qian [Tue, 11 Sep 2012 18:54:40 +0000 (11:54 -0700)]
default kernel buffers to non-cacheable and no-flush.
circular/pagepool/attribute buffer are cacheable but no flush because
of no cpu access in driver.
channel gr_ctx is cacheable and need flush around cpu access.

Bug 1004057

Change-Id: If7bfce46cb70da3b9b5e867bd99ffb08ae1bd27d
Signed-off-by: Jin Qian <jqian@nvidia.com>
Reviewed-on: http://git-master/r/131518
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Mark Stadler <mastadler@nvidia.com>

drivers/video/tegra/host/gk20a/channel_gk20a.c
drivers/video/tegra/host/gk20a/fifo_gk20a.c
drivers/video/tegra/host/gk20a/gr_gk20a.c
drivers/video/tegra/host/gk20a/mm_gk20a.c
drivers/video/tegra/host/gk20a/pmu_gk20a.c
include/linux/nvhost_as_ioctl.h
include/linux/nvhost_ioctl.h

index f4ce850..1e3ce2d 100644 (file)
@@ -653,7 +653,8 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
 
        q->base_gva = ch_vm->map(ch_vm, memmgr,
                        q->mem.ref,
-                       0, 0, 0 /*offset_align, flags, kind*/);
+                        /*offset_align, flags, kind*/
+                       0, 0, 0);
        if (!q->base_gva) {
                nvhost_err(d, "ch %d : failed to map gpu va"
                           "for priv cmd buffer", c->hw_chid);
@@ -936,8 +937,9 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        c->gpfifo.get = c->gpfifo.put = 0;
 
        c->gpfifo.gpu_va = ch_vm->map(ch_vm, memmgr,
-                                     c->gpfifo.mem.ref,
-                                     0, 0 /*offset_align, flags*/, 0);
+                               c->gpfifo.mem.ref,
+                               /*offset_align, flags, kind*/
+                               0, 0, 0);
        if (!c->gpfifo.gpu_va) {
                nvhost_err(d, "channel %d : failed to map"
                           " gpu_va for gpfifo", c->hw_chid);
index a822409..7cc2914 100644 (file)
@@ -336,9 +336,8 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g, bool reinit)
        f->userd.gpu_va = g->mm.bar1.vm.map(&g->mm.bar1.vm,
                                            memmgr,
                                            f->userd.mem.ref,
-                                           4096,
-                                           NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE,
-                                           0);
+                                           /*offset_align, flags, kind*/
+                                           4096, 0, 0);
        nvhost_dbg_info("userd bar1 va = 0x%llx", f->userd.gpu_va);
 
        f->userd.mem.size = f->userd_total_size;
index 7d9a2fa..7371c0c 100644 (file)
@@ -409,6 +409,10 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
 
        nvhost_dbg_fn("");
 
+       /* flush gpu_va before commit */
+       gk20a_mm_fb_flush(c->g);
+       gk20a_mm_l2_flush(c->g, true);
+
        inst_ptr = mem_op().mmap(c->inst_block.mem.ref);
        if (IS_ERR(inst_ptr)) {
                ret = -ENOMEM;
@@ -526,6 +530,11 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
                }
        }
 
+       /* Channel gr_ctx buffer is gpu cacheable.
+          Flush and invalidate before cpu update. */
+       gk20a_mm_fb_flush(g);
+       gk20a_mm_l2_flush(g, true);
+
        mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_v(), 0,
                 ch_ctx->zcull_ctx.ctx_sw_mode);
 
@@ -581,6 +590,11 @@ static int gr_gk20a_ctx_pm_setup(struct gk20a *g, struct channel_gk20a *c,
                disable_engine_activity(...);
        */
 
+       /* Channel gr_ctx buffer is gpu cacheable.
+          Flush and invalidate before cpu update. */
+       gk20a_mm_fb_flush(g);
+       gk20a_mm_l2_flush(g, true);
+
        mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_v(), 0, ch_ctx->pm_ctx.ctx_sw_mode);
        mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_v(), 0, va);
 
@@ -1218,6 +1232,11 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
        ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
        ctx_header_words >>= 2;
 
+       /* Channel gr_ctx buffer is gpu cacheable.
+          Flush before cpu read. */
+       gk20a_mm_fb_flush(g);
+       gk20a_mm_l2_flush(g, false);
+
        for (i = 0; i < ctx_header_words; i++) {
                data = mem_rd32(ctx_ptr, i);
                mem_wr32(gold_ptr, i, data);
@@ -1286,6 +1305,11 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        if (gr->ctx_vars.local_golden_image == NULL)
                return -1;
 
+       /* Channel gr_ctx buffer is gpu cacheable.
+          Flush and invalidate before cpu update. */
+       gk20a_mm_fb_flush(g);
+       gk20a_mm_l2_flush(g, true);
+
        ctx_ptr = mem_op().mmap(ch_ctx->gr_ctx.mem.ref);
        if (IS_ERR(ctx_ptr))
                return -ENOMEM;
@@ -1779,7 +1803,8 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
        gpu_va = ch_vm->map(ch_vm, memmgr,
                            gr->global_ctx_buffer[CIRCULAR].ref,
-                           0, 0, 0 /*offset_align, flags, kind*/);
+                           /*offset_align, flags, kind*/
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[CIRCULAR_VA] = gpu_va;
@@ -1790,21 +1815,24 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
                handle_ref = gr->global_ctx_buffer[ATTRIBUTE_VPR].ref;
 
        gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
-                           0, 0, 0 /*offset_align, flags, kind*/);
+                           /*offset_align, flags, kind*/
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[ATTRIBUTE_VA] = gpu_va;
 
        gpu_va = ch_vm->map(ch_vm, memmgr,
                            gr->global_ctx_buffer[PAGEPOOL].ref,
-                           0, 0, 0/*offset_align, flags, kind*/);
+                           /*offset_align, flags, kind*/
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[PAGEPOOL_VA] = gpu_va;
 
        gpu_va = ch_vm->map(ch_vm, memmgr,
                            gr->global_ctx_buffer[GOLDEN_CTX].ref,
-                           0, 0, 0 /*offset_align, flags, kind*/);
+                           /*offset_align, flags, kind*/
+                           0, 0, 0);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
@@ -1866,7 +1894,9 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
                return -ENOMEM;
 
        gr_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
-               gr_ctx->mem.ref, 0, 0, 0 /*offset_align, flags, kind*/);
+               gr_ctx->mem.ref,
+               /*offset_align, flags, kind*/
+               0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
        if (!gr_ctx->gpu_va) {
                mem_op().put(memmgr, gr_ctx->mem.ref);
                return -ENOMEM;
@@ -1905,7 +1935,8 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
 
        patch_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
                                patch_ctx->mem.ref,
-                               0, 0, 0 /*offset_align, flags, kind*/);
+                               /*offset_align, flags, kind*/
+                               0, 0, 0);
        if (!patch_ctx->gpu_va)
                goto clean_up;
 
index da7197e..c2f40a5 100644 (file)
@@ -856,7 +856,7 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
                               map_offset, map_offset + bfr.size - 1,
                               bfr.kind_v,
                               bfr.ctag_offset,
-                              !(flags & NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE));
+                              flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE);
        if (err) {
                nvhost_err(d, "failed to update ptes on map");
                goto clean_up;
index ecda887..fa3a289 100644 (file)
@@ -993,7 +993,8 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g, bool reinit)
        }
 
        pmu->ucode.pmu_va = vm->map(vm, memmgr, pmu->ucode.mem.ref,
-                       0, NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE, 0);
+                       /*offset_align, flags, kind*/
+                       0, 0, 0);
        if (!pmu->ucode.pmu_va) {
                nvhost_err(d, "failed to map pmu ucode memory!!");
                return err;
@@ -1022,7 +1023,8 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g, bool reinit)
        pmu->pg_buf.mem.size = size;
 
        pmu->pg_buf.pmu_va = vm->map(vm, memmgr, pmu->pg_buf.mem.ref,
-                       0, NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE, 0);
+                        /*offset_align, flags, kind*/
+                       0, 0, 0);
        if (!pmu->pg_buf.pmu_va) {
                nvhost_err(d, "failed to map fecs pg buffer");
                err = -ENOMEM;
@@ -1041,7 +1043,8 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g, bool reinit)
        }
 
        pmu->seq_buf.pmu_va = vm->map(vm, memmgr, pmu->seq_buf.mem.ref,
-                       0, NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE, 0);
+                       /*offset_align, flags, kind*/
+                       0, 0, 0);
        if (!pmu->seq_buf.pmu_va) {
                nvhost_err(d, "failed to map zbc buffer");
                err = -ENOMEM;
index e0e7e1f..ff1552c 100644 (file)
@@ -105,7 +105,8 @@ struct nvhost_as_bind_channel_args {
  */
 struct nvhost_as_map_buffer_args {
        __u32 flags;          /* in/out */
-#define NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET 0x1
+#define NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET            BIT(0)
+#define NVHOST_AS_MAP_BUFFER_FLAGS_CACHEABLE       BIT(2)
        __u32 nvmap_fd;       /* in */
        __u32 nvmap_handle;   /* in */
        __u32 page_size;      /* inout, 0:= best fit to buffer */
index 5e0213b..e3335b8 100644 (file)
@@ -152,8 +152,8 @@ struct nvhost_map_buffer_args {
 #define NVHOST_MAP_BUFFER_FLAGS_OFFSET         BIT(0)
 #define NVHOST_MAP_BUFFER_FLAGS_KIND_PITCH     0x0
 #define NVHOST_MAP_BUFFER_FLAGS_KIND_SPECIFIED BIT(1)
-#define NVHOST_MAP_BUFFER_FLAGS_CACHABLE_TRUE  0x0
-#define NVHOST_MAP_BUFFER_FLAGS_CACHABLE_FALSE BIT(2)
+#define NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_FALSE        0x0
+#define NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE BIT(2)
        __u32 nvmap_handle;
        union {
                __u64 offset; /* valid if _offset flag given (in|out) */