video: tegra: host: Implement SMMU support in gk20a
Terje Bergstrom [Mon, 1 Apr 2013 15:58:32 +0000 (18:58 +0300)]
Implement nvhost_memmgr_sg_table(), which takes advantage of
nvmap_sg_table. Replace all calls to nvhost_memmgr_pin()
with that to get access to the physical addresses of buffers.

Implement nvhost_memmgr_smmu_map(), which maps a given sg_table to
the device passed to it, and nvhost_memmgr_smmu_unmap(), which unmaps.

Use the new functions in gk20a_vm_map() to create an SMMU address for
each buffer. The callers of gk20a_vm_map() will use either physical
address from sg_phys() or IOVA address from sg_dma_address() depending
on size of buffer.

Because SMMU virtual address is marked in gMMU with bit 34 set, some
fields needed to be expanded to 64-bit even when LPAE is turned off.

IMB, PDE and PTE cannot use virtual addresses so they need to be
treated specially.

Bug 1201552

Change-Id: I3b887aafa62076536836ed99fcef46e6d8435030
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/217309

drivers/video/tegra/host/gk20a/channel_gk20a.c
drivers/video/tegra/host/gk20a/channel_gk20a.h
drivers/video/tegra/host/gk20a/fifo_gk20a.c
drivers/video/tegra/host/gk20a/gr_gk20a.c
drivers/video/tegra/host/gk20a/mm_gk20a.c
drivers/video/tegra/host/gk20a/mm_gk20a.h
drivers/video/tegra/host/gk20a/pmu_gk20a.c
drivers/video/tegra/host/nvhost_memmgr.c
drivers/video/tegra/host/nvhost_memmgr.h

index 4027781..786b3c1 100644 (file)
@@ -93,6 +93,7 @@ static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
 
 int channel_gk20a_commit_va(struct channel_gk20a *c)
 {
+       phys_addr_t addr;
        u32 addr_lo;
        u32 addr_hi;
        void *inst_ptr;
@@ -103,11 +104,12 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
        if (IS_ERR(inst_ptr))
                return -ENOMEM;
 
-       addr_lo = u64_lo32(c->vm->pdes.phys) >> 12;
-       addr_hi = u64_hi32(c->vm->pdes.phys);
+       addr = sg_phys(c->vm->pdes.sgt->sgl);
+       addr_lo = u64_lo32(addr) >> 12;
+       addr_hi = u64_hi32(addr);
 
        nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
-                  (u64)c->vm->pdes.phys, addr_lo, addr_hi);
+                  (u64)addr, addr_lo, addr_hi);
 
        mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
                ram_in_page_dir_base_target_vid_mem_f() |
@@ -258,7 +260,8 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
        struct fifo_engine_info_gk20a *engine_info =
                f->engine_info + ENGINE_GR_GK20A;
 
-       u32 inst_ptr = ch_gk20a->inst_block.cpu_pa >> ram_in_base_shift_v();
+       u32 inst_ptr = sg_phys(ch_gk20a->inst_block.mem.sgt->sgl)
+               >> ram_in_base_shift_v();
 
        nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
                ch_gk20a->hw_chid, inst_ptr);
@@ -314,18 +317,16 @@ static int channel_gk20a_alloc_inst(struct gk20a *g,
        }
 
        ch->inst_block.mem.sgt =
-               nvhost_memmgr_pin(memmgr, ch->inst_block.mem.ref);
-       ch->inst_block.cpu_pa = sg_dma_address(ch->inst_block.mem.sgt->sgl);
+               nvhost_memmgr_sg_table(memmgr, ch->inst_block.mem.ref);
 
        /* IS_ERR throws a warning here (expecting void *) */
-       if (ch->inst_block.cpu_pa == -EINVAL ||
-           ch->inst_block.cpu_pa == -EINTR) {
-               ch->inst_block.cpu_pa = 0;
+       if (IS_ERR(ch->inst_block.mem.sgt)) {
+               ch->inst_block.mem.sgt = NULL;
                goto clean_up;
        }
 
        nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
-               ch->hw_chid, (u64)ch->inst_block.cpu_pa);
+               ch->hw_chid, (u64)sg_phys(ch->inst_block.mem.sgt->sgl));
 
        ch->inst_block.mem.size = ram_in_alloc_size_v();
 
@@ -343,8 +344,8 @@ static void channel_gk20a_free_inst(struct gk20a *g,
 {
        struct mem_mgr *memmgr = mem_mgr_from_g(g);
 
-       nvhost_memmgr_unpin(memmgr, ch->inst_block.mem.ref,
-                           ch->inst_block.mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, ch->inst_block.mem.ref,
+                       ch->inst_block.mem.sgt);
        nvhost_memmgr_put(memmgr, ch->inst_block.mem.ref);
        memset(&ch->inst_block, 0, sizeof(struct inst_desc));
 }
@@ -582,7 +583,7 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
        q->base_gva = ch_vm->map(ch_vm, memmgr,
                        q->mem.ref,
                         /*offset_align, flags, kind*/
-                       0, 0, 0);
+                       0, 0, 0, NULL);
        if (!q->base_gva) {
                nvhost_err(d, "ch %d : failed to map gpu va"
                           "for priv cmd buffer", c->hw_chid);
@@ -882,7 +883,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        c->gpfifo.gpu_va = ch_vm->map(ch_vm, memmgr,
                                c->gpfifo.mem.ref,
                                /*offset_align, flags, kind*/
-                               0, 0, 0);
+                               0, 0, 0, NULL);
        if (!c->gpfifo.gpu_va) {
                nvhost_err(d, "channel %d : failed to map"
                           " gpu_va for gpfifo", c->hw_chid);
index 725d0d9..6ceb8c4 100644 (file)
@@ -89,7 +89,7 @@ struct channel_gk20a {
        struct mem_desc_sub ramfc;
 
        void *userd_cpu_va;
-       phys_addr_t userd_cpu_pa;
+       u64 userd_cpu_pa;
        u64 userd_gpu_va;
 
        s32 num_objects;
index 780606f..b9e4282 100644 (file)
@@ -139,20 +139,18 @@ void gk20a_remove_fifo_support(struct fifo_gk20a *f)
        g->mm.bar1.vm.unmap(&g->mm.bar1.vm, f->userd.gpu_va);
 
        nvhost_memmgr_munmap(f->userd.mem.ref, f->userd.cpu_va);
-       nvhost_memmgr_unpin(memmgr, f->userd.mem.ref, f->userd.mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, f->userd.mem.ref, f->userd.mem.sgt);
        nvhost_memmgr_put(memmgr, f->userd.mem.ref);
 
        engine_info = f->engine_info + ENGINE_GR_GK20A;
        runlist_id = engine_info->runlist_id;
        runlist = &f->runlist_info[runlist_id];
 
-       if (runlist->cur_buffer != MAX_RUNLIST_BUFFERS)
-               nvhost_memmgr_unpin(memmgr,
-                                   runlist->mem[runlist->cur_buffer].ref,
-                                   runlist->mem[runlist->cur_buffer].sgt);
-
-       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
+       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+               nvhost_memmgr_free_sg_table(memmgr, runlist->mem[i].ref,
+                               runlist->mem[i].sgt);
                nvhost_memmgr_put(memmgr, runlist->mem[i].ref);
+       }
 
        kfree(runlist->active_channels);
 
@@ -268,6 +266,7 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 
        runlist_size  = ram_rl_entry_size_v() * f->num_channels;
        for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+               struct sg_table *sgt;
                runlist->mem[i].ref =
                        nvhost_memmgr_alloc(memmgr, runlist_size,
                                            DEFAULT_ALLOC_ALIGNMENT,
@@ -275,6 +274,10 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
                                            0);
                if (!runlist->mem[i].ref)
                        goto clean_up_runlist;
+               sgt = nvhost_memmgr_sg_table(memmgr, runlist->mem[i].ref);
+               if (IS_ERR(sgt))
+                       goto clean_up_runlist;
+               runlist->mem[i].sgt = sgt;
                runlist->mem[i].size = runlist_size;
        }
        mutex_init(&runlist->mutex);
@@ -288,8 +291,13 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
        return 0;
 
 clean_up_runlist:
-       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++)
-               nvhost_memmgr_put(memmgr, runlist->mem[i].ref);
+       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+               if (runlist->mem[i].sgt)
+                       nvhost_memmgr_free_sg_table(memmgr, runlist->mem[i].ref,
+                                       runlist->mem[i].sgt);
+               if (runlist->mem[i].ref)
+                       nvhost_memmgr_put(memmgr, runlist->mem[i].ref);
+       }
 
        kfree(runlist->active_channels);
        runlist->active_channels = NULL;
@@ -458,24 +466,16 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
                goto clean_up;
        }
 
-       f->userd.mem.sgt = nvhost_memmgr_pin(memmgr, f->userd.mem.ref);
-       f->userd.cpu_pa = sg_dma_address(f->userd.mem.sgt->sgl);
-       nvhost_dbg_info("userd physical address : 0x%08x",
-                  (u32)f->userd.cpu_pa);
-
-       if (f->userd.cpu_pa == -EINVAL ||
-           f->userd.cpu_pa == -EINTR) {
-               f->userd.cpu_pa = 0;
-               err = -ENOMEM;
-               goto clean_up;
-       }
-
        /* bar1 va */
        f->userd.gpu_va = g->mm.bar1.vm.map(&g->mm.bar1.vm,
                                            memmgr,
                                            f->userd.mem.ref,
                                            /*offset_align, flags, kind*/
-                                           4096, 0, 0);
+                                           4096, 0, 0,
+                                           &f->userd.mem.sgt);
+       f->userd.cpu_pa = gk20a_mm_iova_addr(f->userd.mem.sgt->sgl);
+       nvhost_dbg_info("userd physical address : 0x%08llx - 0x%08llx",
+                       f->userd.cpu_pa, f->userd.cpu_pa + f->userd_total_size);
        nvhost_dbg_info("userd bar1 va = 0x%llx", f->userd.gpu_va);
 
        f->userd.mem.size = f->userd_total_size;
@@ -521,7 +521,8 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 clean_up:
        nvhost_dbg_fn("fail");
        nvhost_memmgr_munmap(f->userd.mem.ref, f->userd.cpu_va);
-       nvhost_memmgr_unpin(memmgr, f->userd.mem.ref, f->userd.mem.sgt);
+       if (f->userd.gpu_va)
+               g->mm.bar1.vm.unmap(&g->mm.bar1.vm, f->userd.gpu_va);
        nvhost_memmgr_put(memmgr, f->userd.mem.ref);
        memset(&f->userd, 0, sizeof(struct userd_desc));
 
@@ -591,7 +592,6 @@ static int gk20a_init_fifo_setup_hw(struct gk20a *g)
                        nvhost_err(dev_from_gk20a(g),
                                "cpu didn't see bar1 write @ %p!",
                                cpu_vaddr);
-                       return -EINVAL;
                }
 
                /* put it back */
@@ -656,7 +656,7 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr)
                if (IS_ERR_OR_NULL(c))
                        continue;
                if (c->inst_block.mem.ref &&
-                   (inst_ptr == (u64)(c->inst_block.cpu_pa)))
+                   (inst_ptr == (u64)(sg_phys(c->inst_block.mem.sgt->sgl))))
                        return f->channel+ci;
        }
        return NULL;
@@ -1132,7 +1132,6 @@ int gk20a_fifo_update_runlist(struct gk20a *g,
        u32 engine_id, u32 hw_chid, bool add)
 {
        struct fifo_gk20a *f = &g->fifo;
-       struct mem_mgr *memmgr = mem_mgr_from_g(g);
        struct fifo_runlist_info_gk20a *runlist = NULL;
        u32 runlist_id = ~0;
        u32 *runlist_entry_base = NULL;
@@ -1177,14 +1176,7 @@ int gk20a_fifo_update_runlist(struct gk20a *g,
        nvhost_dbg_info("runlist_id : %d, switch to new buffer %p",
                runlist_id, runlist->mem[new_buf].ref);
 
-       runlist->mem[new_buf].sgt =
-               nvhost_memmgr_pin(memmgr, runlist->mem[new_buf].ref);
-
-       runlist_pa = sg_dma_address(runlist->mem[new_buf].sgt->sgl);
-       if (!runlist_pa) {
-               ret = -ENOMEM;
-               goto clean_up;
-       }
+       runlist_pa = sg_phys(runlist->mem[new_buf].sgt->sgl);
 
        runlist_entry_base = nvhost_memmgr_mmap(runlist->mem[new_buf].ref);
        if (IS_ERR_OR_NULL(runlist_entry_base)) {
@@ -1237,13 +1229,6 @@ int gk20a_fifo_update_runlist(struct gk20a *g,
        runlist->cur_buffer = new_buf;
 
 clean_up:
-       if (ret != 0)
-               nvhost_memmgr_unpin(memmgr, runlist->mem[new_buf].ref,
-                                   runlist->mem[new_buf].sgt);
-       else if (old_buf != -1)
-               nvhost_memmgr_unpin(memmgr, runlist->mem[old_buf].ref,
-                                   runlist->mem[old_buf].sgt);
-
        nvhost_memmgr_munmap(runlist->mem[new_buf].ref,
                             runlist_entry_base);
 done:
index dd0c580..9d78d13 100644 (file)
@@ -473,8 +473,8 @@ static int gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_gk20a *c,
 static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
                                        struct channel_gk20a *c)
 {
-       u32 inst_base_ptr =
-               u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
+       u32 inst_base_ptr = u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl)
+                                    >> ram_in_base_shift_v());
        u32 ret;
 
        nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
@@ -1181,7 +1181,8 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
        int ret;
 
        u32 inst_base_ptr =
-               u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
+               u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl))
+               >> ram_in_base_shift_v();
 
        nvhost_dbg_fn("");
 
@@ -1335,7 +1336,8 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 
        if (tegra_platform_is_linsim()) {
                u32 inst_base_ptr =
-                       u64_lo32(c->inst_block.cpu_pa) >> ram_in_base_shift_v();
+                       u64_lo32(sg_phys(c->inst_block.mem.sgt->sgl))
+                       >> ram_in_base_shift_v();
 
                ret = gr_gk20a_submit_fecs_method(g, 0, 0, ~0,
                                gr_fecs_current_ctx_ptr_f(inst_base_ptr) |
@@ -1632,7 +1634,8 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
        gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
                            /*offset_align, flags, kind*/
-                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
+                           NULL);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[CIRCULAR_VA] = gpu_va;
@@ -1645,7 +1648,8 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
        gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
                            /*offset_align, flags, kind*/
-                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
+                           NULL);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[ATTRIBUTE_VA] = gpu_va;
@@ -1658,7 +1662,8 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
 
        gpu_va = ch_vm->map(ch_vm, memmgr, handle_ref,
                            /*offset_align, flags, kind*/
-                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
+                           0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0,
+                           NULL);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[PAGEPOOL_VA] = gpu_va;
@@ -1667,7 +1672,7 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
        gpu_va = ch_vm->map(ch_vm, memmgr,
                            gr->global_ctx_buffer[GOLDEN_CTX].ref,
                            /*offset_align, flags, kind*/
-                           0, 0, 0);
+                           0, 0, 0, NULL);
        if (!gpu_va)
                goto clean_up;
        g_bfr_va[GOLDEN_CTX_VA] = gpu_va;
@@ -1731,7 +1736,7 @@ static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
        gr_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
                gr_ctx->mem.ref,
                /*offset_align, flags, kind*/
-               0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0);
+               0, NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 0, NULL);
        if (!gr_ctx->gpu_va) {
                nvhost_memmgr_put(memmgr, gr_ctx->mem.ref);
                return -ENOMEM;
@@ -1771,7 +1776,7 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
        patch_ctx->gpu_va = ch_vm->map(ch_vm, memmgr,
                                patch_ctx->mem.ref,
                                /*offset_align, flags, kind*/
-                               0, 0, 0);
+                               0, 0, 0, NULL);
        if (!patch_ctx->gpu_va)
                goto clean_up;
 
@@ -1981,12 +1986,18 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
 
        gr_gk20a_free_global_ctx_buffers(g);
 
-       nvhost_memmgr_unpin(memmgr, gr->mmu_wr_mem.mem.ref,
-                           gr->mmu_wr_mem.mem.sgt);
-       nvhost_memmgr_unpin(memmgr, gr->mmu_rd_mem.mem.ref,
-                           gr->mmu_rd_mem.mem.sgt);
-       nvhost_memmgr_unpin(memmgr, gr->compbit_store.mem.ref,
-                           gr->compbit_store.mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, gr->mmu_wr_mem.mem.ref,
+                       gr->mmu_wr_mem.mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, gr->mmu_rd_mem.mem.ref,
+                       gr->mmu_rd_mem.mem.sgt);
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+       if (sg_dma_address(gr->compbit_store.mem.sgt->sgl))
+               nvhost_memmgr_smmu_unmap(gr->compbit_store.mem.sgt,
+                               gr->compbit_store.mem.size,
+                               dev_from_gk20a(g));
+#endif
+       nvhost_memmgr_free_sg_table(memmgr, gr->compbit_store.mem.ref,
+                       gr->compbit_store.mem.sgt);
        nvhost_memmgr_put(memmgr, gr->mmu_wr_mem.mem.ref);
        nvhost_memmgr_put(memmgr, gr->mmu_rd_mem.mem.ref);
        nvhost_memmgr_put(memmgr, gr->compbit_store.mem.ref);
@@ -2249,16 +2260,14 @@ static int gr_gk20a_init_mmu_sw(struct gk20a *g, struct gr_gk20a *gr)
        nvhost_memmgr_munmap(gr->mmu_rd_mem.mem.ref, mmu_ptr);
 
        gr->mmu_wr_mem.mem.sgt =
-               nvhost_memmgr_pin(memmgr, gr->mmu_wr_mem.mem.ref);
+               nvhost_memmgr_sg_table(memmgr, gr->mmu_wr_mem.mem.ref);
        if (IS_ERR_OR_NULL(gr->mmu_wr_mem.mem.sgt))
                goto clean_up;
-       gr->mmu_wr_mem.cpu_pa = sg_dma_address(gr->mmu_wr_mem.mem.sgt->sgl);
 
        gr->mmu_rd_mem.mem.sgt =
-               nvhost_memmgr_pin(memmgr, gr->mmu_rd_mem.mem.ref);
+               nvhost_memmgr_sg_table(memmgr, gr->mmu_rd_mem.mem.ref);
        if (IS_ERR_OR_NULL(gr->mmu_rd_mem.mem.sgt))
                goto clean_up;
-       gr->mmu_rd_mem.cpu_pa = sg_dma_address(gr->mmu_rd_mem.mem.sgt->sgl);
        return 0;
 
 clean_up:
@@ -2516,13 +2525,19 @@ static int gr_gk20a_init_comptag(struct gk20a *g, struct gr_gk20a *gr)
        gr->compbit_store.mem.size = compbit_backing_size;
 
        gr->compbit_store.mem.sgt =
-               nvhost_memmgr_pin(memmgr, gr->compbit_store.mem.ref);
+               nvhost_memmgr_sg_table(memmgr, gr->compbit_store.mem.ref);
        if (IS_ERR_OR_NULL(gr->compbit_store.mem.sgt)) {
                ret = -ENOMEM;
                goto clean_up;
        }
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+       ret = nvhost_memmgr_smmu_map(gr->compbit_store.mem.sgt,
+                       compbit_backing_size, dev_from_gk20a(g));
+       if (ret)
+               goto clean_up;
+#endif
        gr->compbit_store.base_pa =
-               sg_dma_address(gr->compbit_store.mem.sgt->sgl);
+               gk20a_mm_iova_addr(gr->compbit_store.mem.sgt->sgl);
 
        nvhost_allocator_init(&gr->comp_tags, "comptag",
                        1, max_comptag_lines, 1);
@@ -2530,6 +2545,9 @@ static int gr_gk20a_init_comptag(struct gk20a *g, struct gr_gk20a *gr)
        return 0;
 
 clean_up:
+       if (gr->compbit_store.mem.sgt)
+               nvhost_memmgr_free_sg_table(memmgr, gr->compbit_store.mem.ref,
+                               gr->compbit_store.mem.sgt);
        nvhost_memmgr_put(memmgr, gr->compbit_store.mem.ref);
        return ret;
 }
@@ -3329,8 +3347,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
        gr_gk20a_slcg_perf_load_gating_prod(g, true);
 
        /* init mmu debug buffer */
-       addr_lo = u64_lo32(gr->mmu_wr_mem.cpu_pa);
-       addr_hi = u64_hi32(gr->mmu_wr_mem.cpu_pa);
+       addr_lo = u64_lo32(sg_phys(gr->mmu_wr_mem.mem.sgt->sgl));
+       addr_hi = u64_hi32(sg_phys(gr->mmu_wr_mem.mem.sgt->sgl));
        addr = (addr_lo >> fb_mmu_debug_wr_addr_alignment_v()) |
                (addr_hi << (32 - fb_mmu_debug_wr_addr_alignment_v()));
 
@@ -3339,8 +3357,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
                     fb_mmu_debug_wr_vol_false_f() |
                     fb_mmu_debug_wr_addr_v(addr));
 
-       addr_lo = u64_lo32(gr->mmu_rd_mem.cpu_pa);
-       addr_hi = u64_hi32(gr->mmu_rd_mem.cpu_pa);
+       addr_lo = u64_lo32(sg_phys(gr->mmu_rd_mem.mem.sgt->sgl));
+       addr_hi = u64_hi32(sg_phys(gr->mmu_rd_mem.mem.sgt->sgl));
        addr = (addr_lo >> fb_mmu_debug_rd_addr_alignment_v()) |
                (addr_hi << (32 - fb_mmu_debug_rd_addr_alignment_v()));
 
@@ -3446,19 +3464,12 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
 
        gr_gk20a_init_zbc(g, gr);
 
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
        {
                u64 compbit_base_post_divide64 = (gr->compbit_store.base_pa >>
                                ltc_ltc0_lts0_cbc_base_alignment_shift_v());
                do_div(compbit_base_post_divide64, gr->num_fbps);
                compbit_base_post_divide = u64_lo32(compbit_base_post_divide64);
        }
-#else
-       compbit_base_post_divide = u64_lo32(
-               (gr->compbit_store.base_pa >>
-                       ltc_ltc0_lts0_cbc_base_alignment_shift_v()) /
-                       gr->num_fbps);
-#endif
 
        compbit_base_post_multiply = ((u64)compbit_base_post_divide *
                gr->num_fbps) << ltc_ltc0_lts0_cbc_base_alignment_shift_v();
@@ -4044,7 +4055,7 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx)
 
        /* slow path */
        for (chid = 0; chid < f->num_channels; chid++)
-               if (f->channel[chid].inst_block.cpu_pa ==
+               if (sg_phys(f->channel[chid].inst_block.mem.sgt->sgl) ==
                    curr_ctx << ram_in_base_shift_v())
                        break;
 
index 64534cb..a8f9c28 100644 (file)
 #include "kind_gk20a.h"
 
 
+#define FLUSH_CPU_DCACHE(va, pa, size) \
+       do {    \
+               __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
+               outer_flush_range(pa, pa + (size_t)(size));             \
+       } while (0)
+
+enum gmmu_page_smmu_type {
+       gmmu_page_smmu_type_physical,
+       gmmu_page_smmu_type_virtual,
+};
+
 static void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
 /* we support 2 page sizes, define indexing based upon that */
 static inline int gmmu_page_size_idx(u32 ps)
@@ -52,6 +63,10 @@ static inline int gmmu_page_size_idx(u32 ps)
        return (ps) != 4096; /* 4K:=0, all else := 1 */
 }
 static const u32 gmmu_page_sizes[2] = { 0x1000, 0x20000 }; /* 4KB and 128KB */
+static const u32 gmmu_page_smmu_types[2] = {
+       gmmu_page_smmu_type_physical,
+       gmmu_page_smmu_type_virtual,
+};
 static const u32 gmmu_page_shift[2] = { 12, 17 };
 static const u64 gmmu_page_offset_mask[2] = { 0xfffLL, 0x1ffffLL };
 static const u64 gmmu_page_mask[2] = { ~0xfffLL, ~0x1ffffLL };
@@ -90,7 +105,8 @@ void gk20a_remove_mm_support(struct mm_gk20a *mm)
 
        nvhost_dbg_fn("");
 
-       nvhost_memmgr_unpin(memmgr, inst_block->mem.ref, inst_block->mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, inst_block->mem.ref,
+                       inst_block->mem.sgt);
        nvhost_memmgr_put(memmgr, inst_block->mem.ref);
 
        vm->remove_support(vm);
@@ -155,7 +171,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
        struct mm_gk20a *mm = &g->mm;
        struct inst_desc *inst_block = &mm->bar1.inst_block;
-       phys_addr_t inst_pa = inst_block->cpu_pa;
+       phys_addr_t inst_pa = sg_phys(inst_block->mem.sgt->sgl);
 
        nvhost_dbg_fn("");
 
@@ -202,16 +218,77 @@ int gk20a_init_mm_support(struct gk20a *g)
        return err;
 }
 
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
-                           phys_addr_t *pa, void **handle,
+                           void **handle,
+                           struct sg_table **sgt)
+{
+       u32 num_pages = 1 << order;
+       u32 len = num_pages * PAGE_SIZE;
+       int err;
+       struct page *pages;
+
+       nvhost_dbg_fn("");
+
+       pages = alloc_pages(GFP_KERNEL, order);
+       if (!pages) {
+               nvhost_dbg(dbg_pte, "alloc_pages failed\n");
+               goto err_out;
+       }
+       *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+       if (!sgt) {
+               nvhost_dbg(dbg_pte, "cannot allocate sg table");
+               goto err_alloced;
+       }
+       err =  sg_alloc_table_from_pages(*sgt, &pages, 1, 0, len, GFP_KERNEL);
+       if (err) {
+               nvhost_dbg(dbg_pte, "sg_alloc_table failed\n");
+               goto err_sg_table;
+       }
+       *handle = page_address(pages);
+       memset(*handle, 0, len);
+
+       return 0;
+
+err_sg_table:
+       kfree(*sgt);
+err_alloced:
+       __free_pages(pages, order);
+err_out:
+       return -ENOMEM;
+}
+
+static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
+                           struct sg_table *sgt, u32 order)
+{
+       nvhost_dbg_fn("");
+       BUG_ON(sgt == NULL);
+       free_pages((unsigned long)handle, order);
+       sg_free_table(sgt);
+       kfree(sgt);
+}
+
+static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
+{
+       FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
+       *va = handle;
+       return 0;
+}
+
+static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
+{
+       FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
+}
+#else
+static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
+                           void **handle,
                            struct sg_table **sgt)
 {
        struct mem_mgr *client = mem_mgr_from_vm(vm);
        struct mem_handle *r;
-       phys_addr_t phys;
        u32 num_pages = 1 << order;
        u32 len = num_pages * PAGE_SIZE;
-       u32 *va;
+       void *va;
 
        nvhost_dbg_fn("");
 
@@ -228,37 +305,36 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
                nvhost_dbg(dbg_pte, "nvmap_mmap failed\n");
                goto err_alloced;
        }
-       *sgt =  nvhost_memmgr_pin(client, r);
-       if (IS_ERR_OR_NULL(*sgt)) {
-               nvhost_dbg(dbg_pte, "nvmap_pin failed\n");
-               goto err_alloced;
+       *sgt = nvhost_memmgr_sg_table(client, r);
+       if (!*sgt) {
+               nvhost_dbg(dbg_pte, "cannot allocate sg table");
+               goto err_mmaped;
        }
-       phys = sg_dma_address((*sgt)->sgl);
        memset(va, 0, len);
        nvhost_memmgr_munmap(r, va);
-       *pa = phys;
        *handle = (void *)r;
 
        return 0;
 
+err_mmaped:
+       nvhost_memmgr_munmap(r, va);
 err_alloced:
        nvhost_memmgr_put(client, r);
 err_out:
        return -ENOMEM;
 }
 
-
 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
                            struct sg_table *sgt, u32 order)
 {
        struct mem_mgr *client = mem_mgr_from_vm(vm);
        nvhost_dbg_fn("");
        BUG_ON(sgt == NULL);
-       nvhost_memmgr_unpin(client, handle, sgt);
+       nvhost_memmgr_free_sg_table(client, handle, sgt);
        nvhost_memmgr_put(client, handle);
 }
 
-static int map_gmmu_pages(void *handle, void **va)
+static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
 {
        struct mem_handle *r = handle;
        u32 *tmp_va;
@@ -276,15 +352,16 @@ err_out:
        return -ENOMEM;
 }
 
-static void unmap_gmmu_pages(void *handle, u32 *va)
+static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
 {
        struct mem_handle *r = handle;
        nvhost_dbg_fn("");
        nvhost_memmgr_munmap(r, va);
 }
+#endif
 
 static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
-                       u64 bfr_addr, u64 first_vaddr, u64 last_vaddr,
+                       struct sg_table *sgt, u64 first_vaddr, u64 last_vaddr,
                        u8 kind_v, u32 ctag_offset, bool cachable);
 
 /* allocate a phys contig region big enough for a full
@@ -294,12 +371,10 @@ static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
 
 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
                                       u32 gmmu_page_size,
-                                      struct page_table_gk20a *pte,
-                                      phys_addr_t *pa)
+                                      struct page_table_gk20a *pte)
 {
        int err, page_size_idx;
        u32 pte_order;
-       phys_addr_t phys;
        void *handle;
        struct sg_table *sgt;
 
@@ -309,13 +384,13 @@ static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
        page_size_idx = gmmu_page_size_idx(gmmu_page_size);
        pte_order = vm->mm->page_table_sizing[page_size_idx].order;
 
-       err = alloc_gmmu_pages(vm, pte_order, &phys, &handle, &sgt);
+       err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt);
        if (err)
                return err;
 
-       nvhost_dbg(dbg_pte, "pte = 0x%p, phys = 0x%x", pte, (unsigned int)phys);
+       nvhost_dbg(dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
+                       pte, (u64)sg_phys(sgt->sgl), pte_order);
 
-       *pa = phys;
        pte->ref = handle;
        pte->sgt = sgt;
        pte->page_size_idx = page_size_idx;
@@ -379,9 +454,9 @@ static int validate_gmmu_page_table_gk20a(struct vm_gk20a *vm,
                                          u32 i,
                                          u32 gmmu_page_size)
 {
-       u32 err;
-       phys_addr_t pte_addr;
-       phys_addr_t dbg_addr;
+       int err;
+       u64 pte_addr;
+       u64 dbg_addr;
        struct page_table_gk20a *pte = vm->pdes.ptes + i;
        u32 *pde;
 
@@ -397,10 +472,11 @@ static int validate_gmmu_page_table_gk20a(struct vm_gk20a *vm,
        if (pte->ref)
                return 0;
 
-       err = zalloc_gmmu_page_table_gk20a(vm, gmmu_page_size, pte, &pte_addr);
+       err = zalloc_gmmu_page_table_gk20a(vm, gmmu_page_size, pte);
        if (err)
                return err;
 
+       pte_addr = sg_phys(pte->sgt->sgl);
        dbg_addr = (u32)pte_addr;
        pte_addr >>= gmmu_pde_address_shift_v();
        pde = pde_from_index(vm, i);
@@ -564,7 +640,7 @@ static u32 nvmap_bfr_param[] = {
 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
 
 struct buffer_attrs {
-       phys_addr_t addr;
+       struct sg_table *sgt;
        u64 size;
        u64 align;
        u32 ctag_offset;
@@ -691,21 +767,21 @@ static int setup_buffer_kind_and_compression(u32 flags,
        return 0;
 }
 
-
 static u64 gk20a_vm_map(struct vm_gk20a *vm,
                        struct mem_mgr *memmgr,
                        struct mem_handle *r,
                        u64 offset_align,
                        u32 flags /*NVHOST_MAP_BUFFER_FLAGS_*/,
-                       u32 kind)
+                       u32 kind,
+                       struct sg_table **sgt)
 {
        struct gk20a *g = gk20a_from_vm(vm);
        struct nvhost_allocator *ctag_allocator = &g->gr.comp_tags;
        struct device *d = &g->dev->dev;
        struct mapped_buffer_node *mapped_buffer = 0;
-       struct sg_table *sgt;
        bool inserted = false, va_allocated = false;
        u32 gmmu_page_size = 0;
+       int gmmu_page_smmu_type = 0;
        u64 map_offset = 0;
        int attr, err = 0;
        struct buffer_attrs bfr = {0};
@@ -744,6 +820,7 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
        }
 
        gmmu_page_size = gmmu_page_sizes[bfr.page_size_idx];
+       gmmu_page_smmu_type = gmmu_page_smmu_types[bfr.page_size_idx];
 
        err = setup_buffer_kind_and_compression(flags, kind,
                                                &bfr, gmmu_page_size);
@@ -775,14 +852,6 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
                                        bfr.ctag_offset + bfr.ctag_lines - 1);
 
 
-       /* TBD: need to get nvmap to assign the correct asid */
-       /* until then there's no point even trying */
-       if (bfr.iovmm_mapped) {
-               nvhost_err(d, "iovmm remapping is unsupported at this time");
-               err = -EINVAL;
-               goto clean_up;
-       }
-
        /* Allocate (or validate when map_offset != 0) the virtual address. */
        if (!map_offset) {
                map_offset = vm->alloc_va(vm, bfr.size,
@@ -801,20 +870,33 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
        }
 
        /* pin buffer to get phys/iovmm addr */
-       sgt = nvhost_memmgr_pin(memmgr, r);
-       if (IS_ERR_OR_NULL(sgt)) {
+       bfr.sgt = nvhost_memmgr_sg_table(memmgr, r);
+       if (IS_ERR_OR_NULL(bfr.sgt)) {
                nvhost_warn(d, "oom allocating tracking buffer");
                goto clean_up;
        }
-       bfr.addr = sg_dma_address(sgt->sgl);
-
-       nvhost_dbg_info("nvmap pinned buffer @ 0x%llx", (u64)bfr.addr);
-       nvhost_dbg_fn("r=%p, map_offset=0x%llx, contig=%d "
+       if (sgt)
+               *sgt = bfr.sgt;
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+       if (gmmu_page_smmu_type == gmmu_page_smmu_type_virtual) {
+               int err = nvhost_memmgr_smmu_map(bfr.sgt,
+                               bfr.size, d);
+               if (err) {
+                       nvhost_dbg(dbg_err, "Could not map to SMMU");
+                       goto clean_up;
+               }
+               nvhost_dbg(dbg_pte, "Mapped to SMMU, address %08x",
+                               sg_dma_address(bfr.sgt->sgl));
+       }
+#endif
+       nvhost_dbg_fn("r=%p, map_offset=0x%llx, contig=%d page_size=%d "
                      "iovmm_mapped=%d kind=0x%x kind_uc=0x%x flags=0x%x",
-                     r, map_offset, bfr.contig, bfr.iovmm_mapped,
+                     r, map_offset, bfr.contig, gmmu_page_size,
+                     bfr.iovmm_mapped,
                      bfr.kind_v, bfr.uc_kind_v, flags);
        nvhost_dbg_info("comptag size=%d start=%d for 0x%llx",
-                       bfr.ctag_lines, bfr.ctag_offset, (u64)bfr.addr);
+                       bfr.ctag_lines, bfr.ctag_offset,
+                       (u64)sg_phys(bfr.sgt->sgl));
 
        /* keep track of the buffer for unmapping */
        /* TBD: check for multiple mapping of same buffer */
@@ -825,7 +907,7 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
        }
        mapped_buffer->memmgr     = memmgr;
        mapped_buffer->handle_ref = r;
-       mapped_buffer->sgt        = sgt;
+       mapped_buffer->sgt        = bfr.sgt;
        mapped_buffer->addr       = map_offset;
        mapped_buffer->size       = bfr.size;
        mapped_buffer->page_size  = gmmu_page_size;
@@ -842,7 +924,7 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
        nvhost_dbg_info("allocated va @ 0x%llx", map_offset);
 
        err = update_gmmu_ptes(vm, bfr.page_size_idx,
-                              bfr.addr,
+                              bfr.sgt,
                               map_offset, map_offset + bfr.size - 1,
                               bfr.kind_v,
                               bfr.ctag_offset,
@@ -870,30 +952,33 @@ clean_up:
 }
 
 static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
-                      u64 bfr_addr, u64 first_vaddr, u64 last_vaddr,
+                      struct sg_table *sgt, u64 first_vaddr, u64 last_vaddr,
                       u8 kind_v, u32 ctag_offset, bool cachable)
 {
        int err;
-       u32 pde_lo, pde_hi, pde_i, cur_page;
+       u32 pde_lo, pde_hi, pde_i;
+       struct scatterlist *cur_chunk;
+       unsigned int cur_offset;
        u32 pte_w[2] = {0, 0}; /* invalid pte */
        u32 ctag = ctag_offset;
        u32 ctag_ptes, ctag_pte_cnt;
+       u32 page_shift;
 
        pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
                                   &pde_lo, &pde_hi);
 
        nvhost_dbg(dbg_pte, "pde_lo=%d, pde_hi=%d", pde_lo, pde_hi);
 
-       if (gmmu_page_sizes[page_size_idx] == 4096) {
-               cur_page = (u32)(bfr_addr >> 12);
-               ctag_ptes = COMP_TAG_LINE_SIZE >> 12;
-       } else {
-               cur_page = (u32)(bfr_addr >> 17);
-               ctag_ptes = COMP_TAG_LINE_SIZE >> 17;
-       }
+       page_shift = gmmu_page_shift[page_size_idx];
+       ctag_ptes = COMP_TAG_LINE_SIZE >> page_shift;
 
-       ctag_pte_cnt = 0;
+       if (sgt)
+               cur_chunk = sgt->sgl;
+       else
+               cur_chunk = NULL;
 
+       cur_offset = 0;
+       ctag_pte_cnt = 0;
        for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
                u32 pte_lo, pte_hi;
                u32 pte_cur;
@@ -919,7 +1004,7 @@ static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
                /* need to worry about crossing pages when accessing the ptes */
                pte_space_page_offset_from_index(pte_lo, &pte_space_page_cur,
                                                 &pte_space_offset_cur);
-               err = map_gmmu_pages(pte->ref, &pte_kv_cur);
+               err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur);
                if (err) {
                        nvhost_err(dev_from_vm(vm),
                                   "couldn't map ptes for update");
@@ -937,23 +1022,33 @@ static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
                                }
                        }
 
-                       nvhost_dbg(dbg_pte, "pte_cur=%d cur_page=0x%x kind=%d ctag=%d",
-                               pte_cur, cur_page, kind_v, ctag);
+                       if (likely(sgt)) {
+                               u64 addr = gk20a_mm_iova_addr(cur_chunk);
+                               addr += cur_offset;
+                               nvhost_dbg(dbg_pte, "pte_cur=%d cur_page=0x%08llx kind=%d ctag=%d",
+                                          pte_cur, addr, kind_v, ctag);
 
-                       if (likely(bfr_addr != 0)) {
+                               addr >>= page_shift;
                                pte_w[0] = gmmu_pte_valid_true_f() |
-                                       gmmu_pte_address_sys_f(cur_page);
+                                       gmmu_pte_address_sys_f(addr);
                                pte_w[1] = gmmu_pte_aperture_video_memory_f() |
                                        gmmu_pte_kind_f(kind_v) |
                                        gmmu_pte_comptagline_f(ctag);
                                if (!cachable)
                                        pte_w[1] |= gmmu_pte_vol_true_f();
-                               cur_page++;
+
+                               cur_offset += 1 << page_shift;
+                               if (cur_offset >= cur_chunk->length) {
+                                       cur_chunk = sg_next(cur_chunk);
+                                       cur_offset = 0;
+                               }
                                pte->ref_cnt++;
-                       } else
+                       } else {
                                pte->ref_cnt--;
+                       }
 
-                       nvhost_dbg(dbg_pte, "pte[0]=0x%x pte[1]=0x%x", pte_w[0], pte_w[1]);
+                       nvhost_dbg(dbg_pte, "vm %p, pte[0]=0x%x pte[1]=0x%x, ref_cnt=%d",
+                                       vm, pte_w[0], pte_w[1], pte->ref_cnt);
                        mem_wr32(pte_kv_cur + pte_space_page_offset*8, 0,
                                 pte_w[0]);
                        mem_wr32(pte_kv_cur + pte_space_page_offset*8, 1,
@@ -962,10 +1057,11 @@ static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
 
                __cpuc_flush_dcache_area(pte_kv_cur, PAGE_SIZE);
 
-               unmap_gmmu_pages(pte->ref, pte_kv_cur);
+               unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
 
                if (pte->ref_cnt == 0) {
                        /* invalidate pde */
+                       nvhost_dbg(dbg_pte, "free vm %p, pde=%d", vm, pde_i);
                        pde = pde_from_index(vm, pde_i);
                        mem_wr32(pde, 0, gmmu_pde_aperture_big_invalid_f());
                        mem_wr32(pde, 1, gmmu_pde_aperture_small_invalid_f());
@@ -974,7 +1070,9 @@ static int update_gmmu_ptes(struct vm_gk20a *vm, u32 page_size_idx,
                        free_gmmu_pages(vm, pte->ref, pte->sgt,
                                        vm->mm->page_table_sizing[page_size_idx].order);
                        pte->ref = NULL;
+                       nvhost_dbg(dbg_pte, "free pde %d", pde_i);
                }
+
        }
 
        smp_mb();
@@ -1029,9 +1127,17 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
                nvhost_dbg(dbg_err, "failed to update ptes on unmap");
        }
 
-       nvhost_memmgr_unpin(mapped_buffer->memmgr,
-                           mapped_buffer->handle_ref,
-                           mapped_buffer->sgt);
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+       if (sg_dma_address(mapped_buffer->sgt->sgl)) {
+               nvhost_dbg(dbg_pte, "unmap from SMMU addr %08x",
+                          sg_dma_address(mapped_buffer->sgt->sgl));
+               nvhost_memmgr_smmu_unmap(mapped_buffer->sgt,
+                                        mapped_buffer->size,
+                                        dev_from_vm(vm));
+       }
+#endif
+       nvhost_memmgr_free_sg_table(mapped_buffer->memmgr,
+                       mapped_buffer->handle_ref, mapped_buffer->sgt);
 
        /* remove from mapped buffer tree, free */
        rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
@@ -1073,7 +1179,7 @@ void gk20a_vm_remove_support(struct vm_gk20a *vm)
                node = rb_first(&vm->mapped_buffers);
        }
 
-       unmap_gmmu_pages(vm->pdes.ref, vm->pdes.kv);
+       unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
        free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
        kfree(vm->pdes.ptes);
        nvhost_allocator_destroy(&vm->vma);
@@ -1120,18 +1226,18 @@ static int gk20a_as_alloc_share(struct nvhost_as_share *as_share)
                   vm->va_limit, vm->pdes.num_pdes);
 
        /* allocate the page table directory */
-       err = alloc_gmmu_pages(vm, 0, &vm->pdes.phys, &vm->pdes.ref,
+       err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
                               &vm->pdes.sgt);
        if (err) {
                return -ENOMEM;
        }
-       err = map_gmmu_pages(vm->pdes.ref, &vm->pdes.kv);
+       err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
        if (err) {
                free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
                return -ENOMEM;
        }
        nvhost_dbg(dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
-                       vm->pdes.kv, (u64)vm->pdes.phys);
+                       vm->pdes.kv, (u64)sg_phys(vm->pdes.sgt->sgl));
        /* we could release vm->pdes.kv but it's only one page... */
 
        /* alloc in 4K granularity */
@@ -1219,7 +1325,8 @@ static int gk20a_as_map_buffer(struct nvhost_as_share *as_share,
 
        nvhost_dbg_fn("");
 
-       ret_va = vm->map(vm, nvmap, r, *offset_align, flags, 0/*no kind here, to be removed*/);
+       ret_va = vm->map(vm, nvmap, r, *offset_align,
+                       flags, 0/*no kind here, to be removed*/, NULL);
        *offset_align = ret_va;
        if (!ret_va)
                err = -EINVAL;
@@ -1284,6 +1391,7 @@ int gk20a_init_bar1_vm(struct mm_gk20a *mm)
        void *inst_ptr;
        struct vm_gk20a *vm = &mm->bar1.vm;
        struct inst_desc *inst_block = &mm->bar1.inst_block;
+       phys_addr_t pde_addr;
        u32 pde_addr_lo;
        u32 pde_addr_hi;
 
@@ -1314,25 +1422,26 @@ int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 
 
        /* allocate the page table directory */
-       err = alloc_gmmu_pages(vm, 0, &vm->pdes.phys, &vm->pdes.ref,
+       err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
                               &vm->pdes.sgt);
        if (err)
                goto clean_up;
 
-       err = map_gmmu_pages(vm->pdes.ref, &vm->pdes.kv);
+       err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
        if (err) {
                free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
                goto clean_up;
        }
        nvhost_dbg(dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
-                       vm->pdes.kv, (u64)vm->pdes.phys);
+                       vm->pdes.kv, (u64)sg_phys(vm->pdes.sgt->sgl));
        /* we could release vm->pdes.kv but it's only one page... */
 
-       pde_addr_lo = u64_lo32(vm->pdes.phys) >> 12;
-       pde_addr_hi = u64_hi32(vm->pdes.phys);
+       pde_addr = sg_phys(vm->pdes.sgt->sgl);
+       pde_addr_lo = u64_lo32(pde_addr) >> 12;
+       pde_addr_hi = u64_hi32(pde_addr);
 
        nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
-               (u64)vm->pdes.phys, pde_addr_lo, pde_addr_hi);
+               (u64)sg_phys(vm->pdes.sgt->sgl), pde_addr_lo, pde_addr_hi);
 
        /* allocate instance mem for bar1 */
        inst_block->mem.size = ram_in_alloc_size_v();
@@ -1348,14 +1457,15 @@ int gk20a_init_bar1_vm(struct mm_gk20a *mm)
                goto clean_up;
        }
 
-       inst_block->mem.sgt = nvhost_memmgr_pin(nvmap, inst_block->mem.ref);
+       inst_block->mem.sgt = nvhost_memmgr_sg_table(nvmap,
+                       inst_block->mem.ref);
        /* IS_ERR throws a warning here (expecting void *) */
        if (IS_ERR_OR_NULL(inst_block->mem.sgt)) {
                inst_pa = 0;
                err = (int)inst_pa;
                goto clean_up;
        }
-       inst_block->cpu_pa = inst_pa = sg_dma_address(inst_block->mem.sgt->sgl);
+       inst_pa = sg_phys(inst_block->mem.sgt->sgl);
 
        inst_ptr = nvhost_memmgr_mmap(inst_block->mem.ref);
        if (IS_ERR(inst_ptr)) {
@@ -1384,7 +1494,7 @@ int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 
        nvhost_memmgr_munmap(inst_block->mem.ref, inst_ptr);
 
-       nvhost_dbg_info("bar1 inst block ptr: %08x",  (u32)inst_pa);
+       nvhost_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
        nvhost_allocator_init(&vm->vma, "gk20a_bar1",
                1, (vm->va_limit >> 12) - 1, 1);
 
@@ -1414,6 +1524,7 @@ int gk20a_init_pmu_vm(struct mm_gk20a *mm)
        void *inst_ptr;
        struct vm_gk20a *vm = &mm->pmu.vm;
        struct inst_desc *inst_block = &mm->pmu.inst_block;
+       u64 pde_addr;
        u32 pde_addr_lo;
        u32 pde_addr_hi;
 
@@ -1443,24 +1554,26 @@ int gk20a_init_pmu_vm(struct mm_gk20a *mm)
                   vm->va_limit, vm->pdes.num_pdes);
 
        /* allocate the page table directory */
-       err = alloc_gmmu_pages(vm, 0, &vm->pdes.phys, &vm->pdes.ref,
+       err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
                               &vm->pdes.sgt);
        if (err)
                goto clean_up;
 
-       err = map_gmmu_pages(vm->pdes.ref, &vm->pdes.kv);
+       err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
        if (err) {
                free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
                goto clean_up;
        }
-       nvhost_dbg_info("pmu pdes phys @ 0x%llx", (u64)vm->pdes.phys);
+       nvhost_dbg_info("pmu pdes phys @ 0x%llx",
+                       (u64)sg_phys(vm->pdes.sgt->sgl));
        /* we could release vm->pdes.kv but it's only one page... */
 
-       pde_addr_lo = u64_lo32(vm->pdes.phys) >> 12;
-       pde_addr_hi = u64_hi32(vm->pdes.phys);
+       pde_addr = sg_phys(vm->pdes.sgt->sgl);
+       pde_addr_lo = u64_lo32(pde_addr) >> 12;
+       pde_addr_hi = u64_hi32(pde_addr);
 
        nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
-                       (u64)vm->pdes.phys, pde_addr_lo, pde_addr_hi);
+                       (u64)pde_addr, pde_addr_lo, pde_addr_hi);
 
        /* allocate instance mem for pmu */
        inst_block->mem.size = GK20A_PMU_INST_SIZE;
@@ -1476,15 +1589,15 @@ int gk20a_init_pmu_vm(struct mm_gk20a *mm)
                goto clean_up;
        }
 
-       inst_block->mem.sgt = nvhost_memmgr_pin(nvmap, inst_block->mem.ref);
+       inst_block->mem.sgt = nvhost_memmgr_sg_table(nvmap,
+                       inst_block->mem.ref);
        /* IS_ERR throws a warning here (expecting void *) */
        if (IS_ERR_OR_NULL(inst_block->mem.sgt)) {
                inst_pa = 0;
                err = (int)((uintptr_t)inst_block->mem.sgt);
                goto clean_up;
        }
-       inst_block->cpu_pa = inst_pa =
-               sg_dma_address(inst_block->mem.sgt->sgl);
+       inst_pa = sg_phys(inst_block->mem.sgt->sgl);
 
        nvhost_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
 
@@ -1626,7 +1739,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
 static void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 {
        struct gk20a *g = gk20a_from_vm(vm);
-       u32 addr_lo = u64_lo32(vm->pdes.phys) >> 12;
+       u32 addr_lo = u64_lo32(sg_phys(vm->pdes.sgt->sgl)) >> 12;
        u32 data;
        s32 retry = 200;
 
@@ -1707,7 +1820,7 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
                nvhost_err(dev_from_vm(vm),
                        "\t[0x%016llx -> 0x%016llx] pde @ 0x%08x: 0x%08x, 0x%08x\n",
                        pde_va, pde_va + mm->pde_stride - 1,
-                       vm->pdes.phys + pde_i * gmmu_pde__size_v(),
+                       sg_phys(vm->pdes.sgt->sgl) + pde_i * gmmu_pde__size_v(),
                        mem_rd32(pde, 0), mem_rd32(pde, 1));
 
                pte_s = vm->pdes.ptes + pde_i;
@@ -1721,9 +1834,11 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
                                                &pte_space_page_cur,
                                                &pte_space_offset_cur);
 
-               err = map_gmmu_pages(pte_s->ref, &pte);
-               pte_s->sgt = mem_op().pin(client, pte_s->ref);
-               pte_addr = sg_dma_address(pte_s->sgt->sgl);
+               err = map_gmmu_pages(pte_s->ref, pte_s->sgt, &pte);
+               pte_s->sgt = nvhost_memmgr_sg_table(client, pte_s->ref);
+               if (WARN_ON(IS_ERR(pte_s->sgt)))
+                       return;
+               pte_addr = sg_phys(pte_s->sgt->sgl);
 
                for (pte_i = pte_lo; pte_i <= pte_hi; pte_i++) {
 
@@ -1744,8 +1859,7 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
                                mem_rd32(pte + pte_space_page_offset * 8, 1));
                }
 
-               unmap_gmmu_pages(pte_s->ref, pte);
-               mem_op().unpin(client, pte_s->ref, pte_s->sgt);
+               unmap_gmmu_pages(pte_s->ref, pte_s->sgt, pte);
        }
 }
 #endif /* VM DEBUG */
index f0eadd8..6100145 100644 (file)
 #ifndef __MM_GK20A_H__
 #define __MM_GK20A_H__
 
+#include <linux/scatterlist.h>
 #include "../nvhost_allocator.h"
 
+/* This "address bit" in the gmmu ptes (and other gk20a accesses)
+ * signals the address as presented should be translated by the SMMU.
+ * Without this bit present gk20a accesses are *not* translated.
+ */
+/* Hack, get this from manuals somehow... */
+#define NV_MC_SMMU_VADDR_TRANSLATION_BIT     34
+
+
 struct mem_desc {
        struct mem_handle *ref;
        struct sg_table *sgt;
@@ -54,13 +63,12 @@ struct mmu_desc {
 
 struct inst_desc {
        struct mem_desc mem;
-       phys_addr_t cpu_pa;
 };
 
 struct userd_desc {
        struct mem_desc mem;
        void *cpu_va;
-       phys_addr_t cpu_pa;
+       u64 cpu_pa;
        u64 gpu_va;
 };
 
@@ -97,7 +105,7 @@ struct gr_ctx_desc {
 
 struct compbit_store_desc {
        struct mem_desc mem;
-       phys_addr_t base_pa;
+       u64 base_pa;
        u32 alignment;
 };
 
@@ -116,7 +124,6 @@ struct page_directory_gk20a {
        /* backing for */
        u32 num_pdes;
        void *kv;
-       phys_addr_t phys;
        /* Either a *page or a *mem_handle */
        void *ref;
        bool dirty;
@@ -162,7 +169,8 @@ struct vm_gk20a {
                   struct mem_handle *r,
                   u64 offset_align,
                   u32 flags /*NVHOST_MAP_BUFFER_FLAGS_*/,
-                  u32 kind);
+                  u32 kind,
+                  struct sg_table **sgt);
 
        /* unmap handle from kernel */
        void (*unmap)(struct vm_gk20a *vm,
@@ -268,4 +276,15 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
 
 int gk20a_mm_suspend(struct gk20a *g);
 
+static inline u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
+{
+       u64 result = sg_phys(sgl);
+#ifdef CONFIG_TEGRA_IOMMU_SMMU
+       if (sg_dma_address(sgl))
+               result = sg_dma_address(sgl) |
+                       1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
+#endif
+       return result;
+}
+
 #endif /*_MM_GK20A_H_ */
index a9656dc..bd957c6 100644 (file)
@@ -345,11 +345,11 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
        gk20a_writel(g, pwr_falcon_itfen_r(),
                gk20a_readl(g, pwr_falcon_itfen_r()) |
                pwr_falcon_itfen_ctxen_enable_f());
-
        gk20a_writel(g, pwr_pmu_new_instblk_r(),
-               pwr_pmu_new_instblk_ptr_f(mm->pmu.inst_block.cpu_pa >> 12) |
+               pwr_pmu_new_instblk_ptr_f(
+                       sg_phys(mm->pmu.inst_block.mem.sgt->sgl) >> 12) |
                pwr_pmu_new_instblk_valid_f(1) |
-               pwr_pmu_new_instblk_target_fb_f());
+               pwr_pmu_new_instblk_target_sys_coh_f());
 
        /* TBD: load all other surfaces */
 
@@ -932,7 +932,8 @@ void gk20a_remove_pmu_support(struct pmu_gk20a *pmu)
 
        release_firmware(pmu->ucode_fw);
 
-       nvhost_memmgr_unpin(memmgr, inst_block->mem.ref, inst_block->mem.sgt);
+       nvhost_memmgr_free_sg_table(memmgr, inst_block->mem.ref,
+                       inst_block->mem.sgt);
        nvhost_memmgr_put(memmgr, inst_block->mem.ref);
        vm->remove_support(vm);
 
@@ -1049,7 +1050,7 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g)
 
        pmu->ucode.pmu_va = vm->map(vm, memmgr, pmu->ucode.mem.ref,
                        /*offset_align, flags, kind*/
-                       0, 0, 0);
+                       0, 0, 0, NULL);
        if (!pmu->ucode.pmu_va) {
                nvhost_err(d, "failed to map pmu ucode memory!!");
                return err;
@@ -1080,7 +1081,7 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g)
 
        pmu->pg_buf.pmu_va = vm->map(vm, memmgr, pmu->pg_buf.mem.ref,
                         /*offset_align, flags, kind*/
-                       0, 0, 0);
+                       0, 0, 0, NULL);
        if (!pmu->pg_buf.pmu_va) {
                nvhost_err(d, "failed to map fecs pg buffer");
                err = -ENOMEM;
@@ -1100,7 +1101,7 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g)
 
        pmu->seq_buf.pmu_va = vm->map(vm, memmgr, pmu->seq_buf.mem.ref,
                        /*offset_align, flags, kind*/
-                       0, 0, 0);
+                       0, 0, 0, NULL);
        if (!pmu->seq_buf.pmu_va) {
                nvhost_err(d, "failed to map zbc buffer");
                err = -ENOMEM;
@@ -1230,7 +1231,8 @@ int gk20a_init_pmu_setup_hw(struct gk20a *g)
 
        pmu->elpg_enable_allow = true;
 
-       err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa);
+       err = gr_gk20a_fecs_set_reglist_bind_inst(g,
+                       sg_phys(mm->pmu.inst_block.mem.sgt->sgl));
        if (err) {
                nvhost_err(dev_from_gk20a(g),
                        "fail to bind pmu inst to gr");
index 2294dab..8e37432 100644 (file)
@@ -355,6 +355,99 @@ u32 nvhost_memmgr_handle_to_id(struct mem_handle *handle)
        return 0;
 }
 
+struct sg_table *nvhost_memmgr_sg_table(struct mem_mgr *mgr,
+               struct mem_handle *handle)
+{
+       switch (nvhost_memmgr_type((ulong)handle)) {
+#ifdef CONFIG_TEGRA_GRHOST_USE_NVMAP
+       case mem_mgr_type_nvmap:
+               return nvmap_sg_table((struct nvmap_client *)mgr,
+                               (struct nvmap_handle_ref *)handle);
+               break;
+#endif
+#ifdef CONFIG_TEGRA_GRHOST_USE_DMABUF
+       case mem_mgr_type_dmabuf:
+               WARN_ON(1);
+               break;
+#endif
+       default:
+               break;
+       }
+
+       return NULL;
+
+}
+
+void nvhost_memmgr_free_sg_table(struct mem_mgr *mgr,
+               struct mem_handle *handle, struct sg_table *sgt)
+{
+       switch (nvhost_memmgr_type((ulong)handle)) {
+#ifdef CONFIG_TEGRA_GRHOST_USE_NVMAP
+       case mem_mgr_type_nvmap:
+               return nvmap_free_sg_table((struct nvmap_client *)mgr,
+                               (struct nvmap_handle_ref *)handle, sgt);
+               break;
+#endif
+#ifdef CONFIG_TEGRA_GRHOST_USE_DMABUF
+       case mem_mgr_type_dmabuf:
+               WARN_ON(1);
+               break;
+#endif
+       default:
+               break;
+       }
+       return;
+}
+
+int nvhost_memmgr_smmu_map(struct sg_table *sgt, size_t size,
+                          struct device *dev)
+{
+       int i;
+       struct scatterlist *sg;
+       DEFINE_DMA_ATTRS(attrs);
+       dma_addr_t addr = dma_iova_alloc(dev, size);
+
+       if (unlikely(sg_dma_address(sgt->sgl) != 0))
+               return 0;
+
+       if (dma_mapping_error(dev, addr))
+               return -ENOMEM;
+
+       dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
+       for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+               dma_addr_t da;
+               void *va;
+               sg_dma_address(sg) = addr;
+               sg_dma_len(sg) = sg->length;
+               for (va = phys_to_virt(sg_phys(sg));
+                    va < phys_to_virt(sg_phys(sg)) + sg->length;
+                    va += PAGE_SIZE, addr += PAGE_SIZE) {
+                       da = dma_map_single_at_attrs(dev, va, addr,
+                               PAGE_SIZE, 0, &attrs);
+                       if (dma_mapping_error(dev, da))
+                               /*  TODO: Clean up */
+                               return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+void nvhost_memmgr_smmu_unmap(struct sg_table *sgt, size_t size,
+               struct device *dev)
+{
+       int i;
+       struct scatterlist *sg;
+       DEFINE_DMA_ATTRS(attrs);
+       dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
+       dma_set_attr(DMA_ATTR_SKIP_FREE_IOVA, &attrs);
+       dma_unmap_single_attrs(dev, sg_dma_address(sgt->sgl), size, 0, &attrs);
+       dma_iova_free(dev, sg_dma_address(sgt->sgl), size);
+       for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+               sg_dma_address(sg) = 0;
+               sg_dma_len(sg) = 0;
+       }
+}
+
 int nvhost_memmgr_init(struct nvhost_chip_support *chip)
 {
        return 0;
index dc10a96..5d099de 100644 (file)
@@ -25,6 +25,7 @@ struct nvhost_chip_support;
 struct mem_mgr;
 struct mem_handle;
 struct platform_device;
+struct device;
 
 struct nvhost_job_unpin {
        struct mem_handle *h;
@@ -66,6 +67,10 @@ void nvhost_memmgr_munmap(struct mem_handle *handle, void *addr);
 void *nvhost_memmgr_kmap(struct mem_handle *handle, unsigned int pagenum);
 void nvhost_memmgr_kunmap(struct mem_handle *handle, unsigned int pagenum,
                void *addr);
+struct sg_table *nvhost_memmgr_sg_table(struct mem_mgr *mgr,
+               struct mem_handle *handle);
+void nvhost_memmgr_free_sg_table(struct mem_mgr *mgr,
+               struct mem_handle *handle, struct sg_table *sgt);
 static inline int nvhost_memmgr_type(u32 id) { return id & MEMMGR_TYPE_MASK; }
 static inline int nvhost_memmgr_id(u32 id) { return id & MEMMGR_ID_MASK; }
 u32 nvhost_memmgr_handle_to_id(struct mem_handle *handle);
@@ -79,5 +84,9 @@ int nvhost_memmgr_pin_array_ids(struct mem_mgr *mgr,
 int nvhost_memmgr_get_param(struct mem_mgr *mem_mgr,
                            struct mem_handle *mem_handle,
                            u32 param, u32 *result);
+int nvhost_memmgr_smmu_map(struct sg_table *sgt, size_t size,
+                          struct device *dev);
+void nvhost_memmgr_smmu_unmap(struct sg_table *sgt, size_t size,
+                          struct device *dev);
 
 #endif