video: tegra: host: gk20a: Defer unmapping
Arto Merilainen [Thu, 27 Jun 2013 11:33:00 +0000 (14:33 +0300)]
This patch adds refcounting for mapped buffers. This allows to defer
unmapping to the point when the channel has actually finished
processing the job.

Change-Id: Ibeb5da3e88b3aecf6b817a7952992d3ccfc69d9e
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/242877
Reviewed-by: Mitch Luban <mluban@nvidia.com>
Tested-by: Mitch Luban <mluban@nvidia.com>

drivers/video/tegra/host/gk20a/channel_gk20a.c
drivers/video/tegra/host/gk20a/channel_gk20a.h
drivers/video/tegra/host/gk20a/mm_gk20a.c
drivers/video/tegra/host/gk20a/mm_gk20a.h
drivers/video/tegra/host/nvhost_as.c
drivers/video/tegra/host/nvhost_intr.c

index 00d6c6d..69f454d 100644 (file)
@@ -19,6 +19,7 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/highmem.h> /* need for nvmap.h*/
 #include <trace/events/nvhost.h>
@@ -1098,6 +1099,57 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
        }
 }
 
+static int gk20a_channel_add_job(struct channel_gk20a *c,
+                                struct nvhost_fence *fence)
+{
+       struct vm_gk20a *vm = c->vm;
+       struct channel_gk20a_job *job = NULL;
+       struct mapped_buffer_node **mapped_buffers = NULL;
+       int err = 0, num_mapped_buffers;
+
+       err = vm->get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
+       if (err)
+               return err;
+
+       job = kzalloc(sizeof(*job), GFP_KERNEL);
+       if (!job) {
+               vm->put_buffers(vm, mapped_buffers, num_mapped_buffers);
+               return -ENOMEM;
+       }
+
+       job->num_mapped_buffers = num_mapped_buffers;
+       job->mapped_buffers = mapped_buffers;
+       job->fence = *fence;
+
+       mutex_lock(&c->jobs_lock);
+       list_add_tail(&job->list, &c->jobs);
+       mutex_unlock(&c->jobs_lock);
+
+       return 0;
+}
+
+void gk20a_channel_update(struct channel_gk20a *c)
+{
+       struct gk20a *g = c->g;
+       struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
+       struct vm_gk20a *vm = c->vm;
+       struct channel_gk20a_job *job, *n;
+
+       mutex_lock(&c->jobs_lock);
+       list_for_each_entry_safe(job, n, &c->jobs, list) {
+               bool completed = nvhost_syncpt_is_expired(sp,
+                       job->fence.syncpt_id, job->fence.value);
+               if (!completed)
+                       break;
+
+               vm->put_buffers(vm, job->mapped_buffers,
+                               job->num_mapped_buffers);
+               list_del_init(&job->list);
+               kfree(job);
+       }
+       mutex_unlock(&c->jobs_lock);
+}
+
 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                struct nvhost_gpfifo *gpfifo,
                                u32 num_entries,
@@ -1296,6 +1348,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                           incr_id, fence->value);
 
 
+       /* TODO! Check for errors... */
+       gk20a_channel_add_job(c, fence);
+
        c->cmds_pending = true;
        gk20a_bar1_writel(g,
                c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
@@ -1327,6 +1382,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
        c->hw_chid = chid;
        c->bound = false;
        c->remove_support = gk20a_remove_channel_support;
+       mutex_init(&c->jobs_lock);
+       INIT_LIST_HEAD(&c->jobs);
 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
        mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
 #endif
index 5b63c96..fd28e9b 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/mutex.h>
+#include <linux/nvhost_ioctl.h>
 struct gk20a;
 struct gr_gk20a;
 struct mem_mgr;
@@ -66,6 +67,13 @@ struct channel_ctx_gk20a {
        bool    global_ctx_buffer_mapped;
 };
 
+struct channel_gk20a_job {
+       struct mapped_buffer_node **mapped_buffers;
+       int num_mapped_buffers;
+       struct nvhost_fence fence;
+       struct list_head list;
+};
+
 /* this is the priv element of struct nvhost_channel */
 struct channel_gk20a {
        struct gk20a *g;
@@ -79,6 +87,9 @@ struct channel_gk20a {
        struct nvhost_channel *ch;
        struct nvhost_hwctx *hwctx;
 
+       struct list_head jobs;
+       struct mutex jobs_lock;
+
        struct vm_gk20a *vm;
 
        struct gpfifo_desc gpfifo;
@@ -136,6 +147,7 @@ struct nvhost_gpfifo;
 struct nvhost_zbc_set_table_args;
 struct nvhost_cycle_stats_args;
 
+void gk20a_channel_update(struct channel_gk20a *c);
 int gk20a_init_channel_support(struct gk20a *, u32 chid);
 int gk20a_channel_init(struct nvhost_channel *ch, struct nvhost_master *host,
                       int index);
index 746c7ac..4ee70ea 100644 (file)
@@ -56,7 +56,10 @@ enum gmmu_page_smmu_type {
        gmmu_page_smmu_type_virtual,
 };
 
-
+static void gk20a_vm_unmap_locked(struct vm_gk20a *vm, u64 offset,
+                       struct mem_mgr **memmgr, struct mem_handle **r);
+static struct mapped_buffer_node *find_mapped_buffer(struct rb_root *root,
+                                                    u64 addr);
 static void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
 static int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
                struct mem_mgr **memmgr, struct mem_handle **r, u64 *offset);
@@ -494,6 +497,94 @@ static int validate_gmmu_page_table_gk20a(struct vm_gk20a *vm,
        return 0;
 }
 
+static int gk20a_vm_get_buffers(struct vm_gk20a *vm,
+                               struct mapped_buffer_node ***mapped_buffers,
+                               int *num_buffers)
+{
+       struct mapped_buffer_node *mapped_buffer;
+       struct mapped_buffer_node **buffer_list;
+       struct rb_node *node;
+       int i = 0;
+
+       mutex_lock(&vm->mapped_buffers_lock);
+
+       buffer_list = kzalloc(sizeof(*buffer_list) *
+                             vm->num_user_mapped_buffers, GFP_KERNEL);
+       if (!buffer_list) {
+               mutex_unlock(&vm->mapped_buffers_lock);
+               return -ENOMEM;
+       }
+
+       node = rb_first(&vm->mapped_buffers);
+       while (node) {
+               mapped_buffer =
+                       container_of(node, struct mapped_buffer_node, node);
+               if (mapped_buffer->user_mapped) {
+                       buffer_list[i] = mapped_buffer;
+                       kref_get(&mapped_buffer->ref);
+                       i++;
+               }
+               node = rb_next(&mapped_buffer->node);
+       }
+
+       BUG_ON(i != vm->num_user_mapped_buffers);
+
+       *num_buffers = vm->num_user_mapped_buffers;
+       *mapped_buffers = buffer_list;
+
+       mutex_unlock(&vm->mapped_buffers_lock);
+
+       return 0;
+}
+
+static void gk20a_vm_unmap_buffer(struct kref *ref)
+{
+       struct mapped_buffer_node *mapped_buffer =
+               container_of(ref, struct mapped_buffer_node, ref);
+       struct vm_gk20a *vm = mapped_buffer->vm;
+       struct mem_mgr *memmgr = NULL;
+       struct mem_handle *r = NULL;
+
+       gk20a_vm_unmap_locked(vm, mapped_buffer->addr, &memmgr, &r);
+       nvhost_memmgr_put(memmgr, r);
+       nvhost_memmgr_put_mgr(memmgr);
+}
+
+static void gk20a_vm_put_buffers(struct vm_gk20a *vm,
+                                struct mapped_buffer_node **mapped_buffers,
+                                int num_buffers)
+{
+       int i;
+
+       mutex_lock(&vm->mapped_buffers_lock);
+       for (i = 0; i < num_buffers; ++i)
+               kref_put(&mapped_buffers[i]->ref,
+                        gk20a_vm_unmap_buffer);
+       mutex_unlock(&vm->mapped_buffers_lock);
+
+       kfree(mapped_buffers);
+}
+
+static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
+{
+       struct mapped_buffer_node *mapped_buffer;
+
+       mutex_lock(&vm->mapped_buffers_lock);
+
+       mapped_buffer = find_mapped_buffer(&vm->mapped_buffers, offset);
+       if (!mapped_buffer) {
+               mutex_unlock(&vm->mapped_buffers_lock);
+               nvhost_dbg(dbg_err, "invalid addr to unmap 0x%llx", offset);
+               return;
+       }
+
+       mapped_buffer->user_mapped = false;
+       vm->num_user_mapped_buffers--;
+       kref_put(&mapped_buffer->ref, gk20a_vm_unmap_buffer);
+
+       mutex_unlock(&vm->mapped_buffers_lock);
+}
+
 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
                             u64 size,
                             enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
@@ -956,15 +1047,20 @@ static u64 gk20a_vm_map(struct vm_gk20a *vm,
        mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
        mapped_buffer->ctag_offset = bfr.ctag_offset;
        mapped_buffer->ctag_lines  = bfr.ctag_lines;
+       mapped_buffer->vm          = vm;
+       mapped_buffer->user_mapped = true;
+       kref_init(&mapped_buffer->ref);
 
        mutex_lock(&vm->mapped_buffers_lock);
        err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
-       mutex_unlock(&vm->mapped_buffers_lock);
        if (err) {
+               mutex_unlock(&vm->mapped_buffers_lock);
                nvhost_err(d, "failed to insert into mapped buffer tree");
                goto clean_up;
        }
        inserted = true;
+       vm->num_user_mapped_buffers++;
+       mutex_unlock(&vm->mapped_buffers_lock);
 
        nvhost_dbg_info("allocated va @ 0x%llx", map_offset);
 
@@ -985,6 +1081,7 @@ clean_up:
        if (inserted) {
                mutex_lock(&vm->mapped_buffers_lock);
                rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
+               vm->num_user_mapped_buffers--;
                mutex_unlock(&vm->mapped_buffers_lock);
        }
        kfree(mapped_buffer);
@@ -1291,6 +1388,10 @@ static void gk20a_vm_unmap_locked(struct vm_gk20a *vm, u64 offset,
        /* remove from mapped buffer tree, free */
        rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
 
+       /* keep track of mapped buffers */
+       if (mapped_buffer->user_mapped)
+               vm->num_user_mapped_buffers--;
+
        *memmgr = mapped_buffer->memmgr;
        *r = mapped_buffer->handle_ref;
        kfree(mapped_buffer);
@@ -1298,21 +1399,15 @@ static void gk20a_vm_unmap_locked(struct vm_gk20a *vm, u64 offset,
        return;
 }
 
-static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
-                       struct mem_mgr **memmgr, struct mem_handle **r)
-{
-       mutex_lock(&vm->mapped_buffers_lock);
-       gk20a_vm_unmap_locked(vm, offset, memmgr, r);
-       mutex_unlock(&vm->mapped_buffers_lock);
-}
-
 /* called by kernel. mem_mgr and mem_handle are ignored */
 static void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
 {
        struct mem_mgr *memmgr;
        struct mem_handle *r;
 
-       gk20a_vm_unmap_user(vm, offset, &memmgr, &r);
+       mutex_lock(&vm->mapped_buffers_lock);
+       gk20a_vm_unmap_locked(vm, offset, &memmgr, &r);
+       mutex_unlock(&vm->mapped_buffers_lock);
 }
 
 void gk20a_vm_remove_support(struct vm_gk20a *vm)
@@ -1452,6 +1547,8 @@ static int gk20a_as_alloc_share(struct nvhost_as_share *as_share)
        vm->map            = gk20a_vm_map;
        vm->unmap          = gk20a_vm_unmap;
        vm->unmap_user     = gk20a_vm_unmap_user;
+       vm->put_buffers    = gk20a_vm_put_buffers;
+       vm->get_buffers    = gk20a_vm_get_buffers;
        vm->tlb_inval      = gk20a_mm_tlb_invalidate;
        vm->find_buffer    = gk20a_vm_find_buffer;
        vm->remove_support = gk20a_vm_remove_support;
@@ -1594,44 +1691,17 @@ static int gk20a_as_unmap_buffer(struct nvhost_as_share *as_share, u64 offset,
                                 struct mem_mgr **memmgr, struct mem_handle **r)
 {
        struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
-       int err = 0;
-       struct nvhost_hwctx *hwctx;
-       struct channel_gk20a *ch;
-       struct list_head *pos;
-       unsigned long timeout = CONFIG_TEGRA_GRHOST_DEFAULT_TIMEOUT;
 
        nvhost_dbg_fn("");
 
-       if (!tegra_platform_is_silicon())
-               timeout = MAX_SCHEDULE_TIMEOUT;
-
-       /* User mode clients expect to be able to cleanly free a buffers after
-        * launching work against them.  To avoid causing mmu faults we wait
-        * for all pending work with respect to the share to clear before
-        * unmapping the pages...
-        * Note: the finish call below takes care to wait only if necessary.
-        * So only the first in a series of unmappings will cause a wait for
-        * idle.
-        */
-       /* TODO: grab bound list lock, release during wait */
-       /* TODO: even better: schedule deferred (finish,unmap) and return
-        * immediately */
-       list_for_each(pos, &as_share->bound_list) {
-               hwctx = container_of(pos, struct nvhost_hwctx,
-                                    as_share_bound_list_node);
-               if (likely(!hwctx->has_timedout)) {
-                       ch =  (struct channel_gk20a *)hwctx->priv;
-                       BUG_ON(!ch);
-                       err = gk20a_channel_finish(ch, timeout);
-                       if (err)
-                               break;
-               }
-       }
-
-       if (!err)
-               vm->unmap_user(vm, offset, memmgr, r);
+       vm->unmap_user(vm, offset);
 
-       return err;
+       /* these are not available */
+       if (memmgr)
+               *memmgr = NULL;
+       if (r)
+               *r = NULL;
+       return 0;
 }
 
 
@@ -1783,6 +1853,8 @@ int gk20a_init_bar1_vm(struct mm_gk20a *mm)
        vm->map            = gk20a_vm_map;
        vm->unmap          = gk20a_vm_unmap;
        vm->unmap_user     = gk20a_vm_unmap_user;
+       vm->put_buffers    = gk20a_vm_put_buffers;
+       vm->get_buffers    = gk20a_vm_get_buffers;
        vm->tlb_inval      = gk20a_mm_tlb_invalidate;
        vm->remove_support = gk20a_vm_remove_support;
 
@@ -1929,6 +2001,8 @@ int gk20a_init_pmu_vm(struct mm_gk20a *mm)
        vm->map            = gk20a_vm_map;
        vm->unmap          = gk20a_vm_unmap;
        vm->unmap_user     = gk20a_vm_unmap_user;
+       vm->put_buffers    = gk20a_vm_put_buffers;
+       vm->get_buffers    = gk20a_vm_get_buffers;
        vm->tlb_inval      = gk20a_mm_tlb_invalidate;
        vm->remove_support = gk20a_vm_remove_support;
 
index ff8b696..3fc9778 100644 (file)
@@ -137,12 +137,15 @@ struct page_directory_gk20a {
 };
 
 struct mapped_buffer_node {
+       struct vm_gk20a *vm;
        struct rb_node node;
        u64 addr;
        u64 size;
        struct mem_mgr *memmgr;
        struct mem_handle *handle_ref;
        struct sg_table *sgt;
+       struct kref ref;
+       bool user_mapped;
        u32 pgsz_idx;
        u32 ctag_offset;
        u32 ctag_lines;
@@ -155,9 +158,12 @@ struct vm_gk20a {
        u64 va_start;
        u64 va_limit;
 
+       int num_user_mapped_buffers;
+
        bool big_pages;   /* enable large page support */
        bool enable_ctag;
        bool tlb_dirty;
+       bool mapped;
 
        struct page_directory_gk20a pdes;
 
@@ -184,10 +190,17 @@ struct vm_gk20a {
                u64 offset);
 
        /* unmap handle from user */
-       void (*unmap_user)(struct vm_gk20a *vm,
-               u64 offset,
-               struct mem_mgr **memmgr,
-               struct mem_handle **r);
+       void (*unmap_user)(struct vm_gk20a *vm, u64 offset);
+
+       /* get reference to all currently mapped buffers */
+       int (*get_buffers)(struct vm_gk20a *vm,
+                struct mapped_buffer_node ***mapped_buffers,
+                int *num_buffers);
+
+       /* put references on the given buffers */
+       void (*put_buffers)(struct vm_gk20a *vm,
+                struct mapped_buffer_node **mapped_buffers,
+                int num_buffers);
 
        /* invalidate tlbs for the vm area */
        void (*tlb_inval)(struct vm_gk20a *vm);
index 3b0e2ed..55dd96b 100644 (file)
@@ -420,19 +420,8 @@ int nvhost_as_ioctl_map_buffer(struct nvhost_as_share *as_share,
 int nvhost_as_ioctl_unmap_buffer(struct nvhost_as_share *as_share,
                                 struct nvhost_as_unmap_buffer_args *args)
 {
-       struct mem_mgr *memmgr = NULL;
-       struct mem_handle *r = NULL;
-       int err;
-
        nvhost_dbg_fn("");
 
-       err = as_share->as->ops->unmap_buffer(as_share,
-                       args->offset, &memmgr, &r);
-       if (err)
-               return err;
-
-       nvhost_memmgr_put(memmgr, r);
-       nvhost_memmgr_put_mgr(memmgr);
-
-       return err;
+       return as_share->as->ops->unmap_buffer(as_share,
+                       args->offset, NULL, NULL);
 }
index 2c32d13..619310e 100644 (file)
@@ -159,6 +159,7 @@ static void action_gpfifo_submit_complete(struct nvhost_waitlist *waiter)
        struct channel_gk20a *ch20a = waiter->data;
        int nr_completed = waiter->count;
        wake_up(&ch20a->submit_wq);
+       gk20a_channel_update(ch20a);
        nvhost_module_idle_mult(ch20a->ch->dev, nr_completed);
        /* TODO: add trace function */
 }