gpu: nvgpu: add open channel ioctl to ctrl node
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
index a9aec43..d61656f 100644 (file)
@@ -1,9 +1,7 @@
 /*
- * drivers/video/tegra/host/gk20a/channel_gk20a.c
- *
  * GK20A Graphics channel
  *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -14,9 +12,8 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/nvhost.h>
@@ -33,6 +30,7 @@
 
 #include "gk20a.h"
 #include "dbg_gpu_gk20a.h"
+#include "fence_gk20a.h"
 #include "semaphore_gk20a.h"
 
 #include "hw_ram_gk20a.h"
@@ -55,16 +53,8 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
 
 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-                       u64 gpfifo_base, u32 gpfifo_entries);
 
 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
-static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
-
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-                               struct channel_gk20a *ch);
-static void channel_gk20a_free_inst(struct gk20a *g,
-                               struct channel_gk20a *ch);
 
 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
                                        bool add);
@@ -97,37 +87,13 @@ static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
 
 int channel_gk20a_commit_va(struct channel_gk20a *c)
 {
-       u64 addr;
-       u32 addr_lo;
-       u32 addr_hi;
-       void *inst_ptr;
-
        gk20a_dbg_fn("");
 
-       inst_ptr = c->inst_block.cpuva;
-       if (!inst_ptr)
+       if (!c->inst_block.cpuva)
                return -ENOMEM;
 
-       addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
-       addr_lo = u64_lo32(addr >> 12);
-       addr_hi = u64_hi32(addr);
-
-       gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
-                  (u64)addr, addr_lo, addr_hi);
-
-       gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
-               ram_in_page_dir_base_target_vid_mem_f() |
-               ram_in_page_dir_base_vol_true_f() |
-               ram_in_page_dir_base_lo_f(addr_lo));
-
-       gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
-               ram_in_page_dir_base_hi_f(addr_hi));
-
-       gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
-                u64_lo32(c->vm->va_limit) | 0xFFF);
-
-       gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
-               ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
+       gk20a_init_inst_block(&c->inst_block, c->vm,
+                       c->vm->gmmu_page_sizes[gmmu_page_size_big]);
 
        return 0;
 }
@@ -173,12 +139,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
                return -ENOMEM;
 
        /* disable channel */
-       gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
-               gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
-               ccsr_channel_enable_clr_true_f());
+       c->g->ops.fifo.disable_channel(c);
 
        /* preempt the channel */
-       WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
+       WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
 
        /* value field is 8 bits long */
        while (value >= 1 << 8) {
@@ -194,9 +158,9 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
        }
 
        /* set new timeslice */
-       gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
+       gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
                value | (shift << 12) |
-               fifo_eng_timeslice_enable_true_f());
+               fifo_runlist_timeslice_enable_true_f());
 
        /* enable channel */
        gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
@@ -206,8 +170,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
        return 0;
 }
 
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-                               u64 gpfifo_base, u32 gpfifo_entries)
+int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+                       u64 gpfifo_base, u32 gpfifo_entries)
 {
        void *inst_ptr;
 
@@ -257,10 +221,10 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
                pbdma_acquire_timeout_man_max_f() |
                pbdma_acquire_timeout_en_disable_f());
 
-       gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
-               fifo_eng_timeslice_timeout_128_f() |
-               fifo_eng_timeslice_timescale_3_f() |
-               fifo_eng_timeslice_enable_true_f());
+       gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
+               fifo_runlist_timeslice_timeout_128_f() |
+               fifo_runlist_timeslice_timescale_3_f() |
+               fifo_runlist_timeslice_enable_true_f());
 
        gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
                fifo_pb_timeslice_timeout_16_f() |
@@ -269,7 +233,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 
        gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 
-       return 0;
+       return channel_gk20a_commit_userd(c);
 }
 
 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
@@ -323,7 +287,7 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
                 ccsr_channel_enable_set_true_f());
 }
 
-static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
+void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 {
        struct gk20a *g = ch_gk20a->g;
 
@@ -347,76 +311,78 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
        }
 }
 
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-                               struct channel_gk20a *ch)
+int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-       struct device *d = dev_from_gk20a(g);
-       int err = 0;
-       dma_addr_t iova;
+       int err;
 
        gk20a_dbg_fn("");
 
-       ch->inst_block.size = ram_in_alloc_size_v();
-       ch->inst_block.cpuva = dma_alloc_coherent(d,
-                                       ch->inst_block.size,
-                                       &iova,
-                                       GFP_KERNEL);
-       if (!ch->inst_block.cpuva) {
-               gk20a_err(d, "%s: memory allocation failed\n", __func__);
-               err = -ENOMEM;
-               goto clean_up;
-       }
-
-       ch->inst_block.iova = iova;
-       ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
-                                                       ch->inst_block.iova);
-       if (!ch->inst_block.cpu_pa) {
-               gk20a_err(d, "%s: failed to get physical address\n", __func__);
-               err = -ENOMEM;
-               goto clean_up;
-       }
+       err = gk20a_alloc_inst_block(g, &ch->inst_block);
+       if (err)
+               return err;
 
        gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
                ch->hw_chid, (u64)ch->inst_block.cpu_pa);
 
        gk20a_dbg_fn("done");
        return 0;
+}
 
-clean_up:
-       gk20a_err(d, "fail");
-       channel_gk20a_free_inst(g, ch);
-       return err;
+void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+       gk20a_free_inst_block(g, &ch->inst_block);
 }
 
-static void channel_gk20a_free_inst(struct gk20a *g,
-                               struct channel_gk20a *ch)
+static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 {
-       struct device *d = dev_from_gk20a(g);
+       return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
+}
 
-       if (ch->inst_block.cpuva)
-               dma_free_coherent(d, ch->inst_block.size,
-                               ch->inst_block.cpuva, ch->inst_block.iova);
-       ch->inst_block.cpuva = NULL;
-       ch->inst_block.iova = 0;
-       memset(&ch->inst_block, 0, sizeof(struct inst_desc));
+void channel_gk20a_enable(struct channel_gk20a *ch)
+{
+       /* enable channel */
+       gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+               gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
+               ccsr_channel_enable_set_true_f());
 }
 
-static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
+void channel_gk20a_disable(struct channel_gk20a *ch)
 {
-       return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
+       /* disable channel */
+       gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+               gk20a_readl(ch->g,
+                       ccsr_channel_r(ch->hw_chid)) |
+                       ccsr_channel_enable_clr_true_f());
 }
 
-void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
+void gk20a_channel_abort(struct channel_gk20a *ch)
 {
+       struct channel_gk20a_job *job, *n;
+       bool released_job_semaphore = false;
+
        /* ensure no fences are pending */
+       mutex_lock(&ch->submit_lock);
        if (ch->sync)
                ch->sync->set_min_eq_max(ch->sync);
+       mutex_unlock(&ch->submit_lock);
+
+       /* release all job semaphores (applies only to jobs that use
+          semaphore synchronization) */
+       mutex_lock(&ch->jobs_lock);
+       list_for_each_entry_safe(job, n, &ch->jobs, list) {
+               if (job->post_fence->semaphore) {
+                       gk20a_semaphore_release(job->post_fence->semaphore);
+                       released_job_semaphore = true;
+               }
+       }
+       mutex_unlock(&ch->jobs_lock);
 
-       /* disable channel */
-       gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
-                    gk20a_readl(ch->g,
-                    ccsr_channel_r(ch->hw_chid)) |
-                    ccsr_channel_enable_clr_true_f());
+       ch->g->ops.fifo.disable_channel(ch);
+
+       if (released_job_semaphore) {
+               wake_up_interruptible_all(&ch->semaphore_wq);
+               gk20a_channel_update(ch, 0);
+       }
 }
 
 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
@@ -455,12 +421,12 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
        }
 
        /* disable the channel from hw and increment syncpoints */
-       gk20a_disable_channel_no_update(ch);
+       gk20a_channel_abort(ch);
 
        gk20a_wait_channel_idle(ch);
 
        /* preempt the channel */
-       gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
+       ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
 
        /* remove channel from runlist */
        channel_gk20a_update_runlist(ch, false);
@@ -484,15 +450,15 @@ static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
 }
 
 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
-                      struct nvhost_cycle_stats_args *args)
+                      struct nvgpu_cycle_stats_args *args)
 {
        struct dma_buf *dmabuf;
        void *virtual_address;
 
-       if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
+       if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
 
                /* set up new cyclestats buffer */
-               dmabuf = dma_buf_get(args->nvmap_handle);
+               dmabuf = dma_buf_get(args->dmabuf_fd);
                if (IS_ERR(dmabuf))
                        return PTR_ERR(dmabuf);
                virtual_address = dma_buf_vmap(dmabuf);
@@ -504,12 +470,12 @@ static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
                ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
                return 0;
 
-       } else if (!args->nvmap_handle &&
+       } else if (!args->dmabuf_fd &&
                        ch->cyclestate.cyclestate_buffer_handler) {
                gk20a_free_cycle_stats_buffer(ch);
                return 0;
 
-       } else if (!args->nvmap_handle &&
+       } else if (!args->dmabuf_fd &&
                        !ch->cyclestate.cyclestate_buffer_handler) {
                /* no requst from GL */
                return 0;
@@ -522,7 +488,7 @@ static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
 #endif
 
 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
-               struct nvhost_set_error_notifier *args) {
+               struct nvgpu_set_error_notifier *args) {
        void *va;
 
        struct dma_buf *dmabuf;
@@ -553,7 +519,7 @@ static int gk20a_init_error_notifier(struct channel_gk20a *ch,
        ch->error_notifier_ref = dmabuf;
        ch->error_notifier = va + args->offset;
        ch->error_notifier_va = va;
-       memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
+       memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
        return 0;
 }
 
@@ -572,7 +538,7 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
                ch->error_notifier->info32 = error;
                ch->error_notifier->status = 0xffff;
                gk20a_err(dev_from_gk20a(ch->g),
-                               "error notifier set to %d\n", error);
+                   "error notifier set to %d for ch %d\n", error, ch->hw_chid);
        }
 }
 
@@ -581,9 +547,9 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
        if (ch->error_notifier_ref) {
                dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
                dma_buf_put(ch->error_notifier_ref);
-               ch->error_notifier_ref = 0;
-               ch->error_notifier = 0;
-               ch->error_notifier_va = 0;
+               ch->error_notifier_ref = NULL;
+               ch->error_notifier = NULL;
+               ch->error_notifier_va = NULL;
        }
 }
 
@@ -624,7 +590,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
        gk20a_free_error_notifiers(ch);
 
        /* release channel ctx */
-       gk20a_free_channel_ctx(ch);
+       g->ops.gr.free_channel_ctx(ch);
 
        gk20a_gr_flush_channel_tlb(gr);
 
@@ -648,22 +614,40 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
 
        channel_gk20a_free_priv_cmdbuf(ch);
 
+       /* sync must be destroyed before releasing channel vm */
+       if (ch->sync) {
+               ch->sync->destroy(ch->sync);
+               ch->sync = NULL;
+       }
+
        /* release channel binding to the as_share */
-       gk20a_as_release_share(ch_vm->as_share);
+       if (ch_vm->as_share)
+               gk20a_as_release_share(ch_vm->as_share);
+       else
+               gk20a_vm_put(ch_vm);
+
+       spin_lock(&ch->update_fn_lock);
+       ch->update_fn = NULL;
+       ch->update_fn_data = NULL;
+       spin_unlock(&ch->update_fn_lock);
+       cancel_work_sync(&ch->update_fn_work);
 
 unbind:
-       channel_gk20a_unbind(ch);
-       channel_gk20a_free_inst(g, ch);
+       if (gk20a_is_channel_marked_as_tsg(ch))
+               gk20a_tsg_unbind_channel(ch);
+
+       g->ops.fifo.unbind_channel(ch);
+       g->ops.fifo.free_inst(g, ch);
 
        ch->vpr = false;
        ch->vm = NULL;
 
-       gk20a_channel_fence_close(&ch->last_submit.pre_fence);
-       gk20a_channel_fence_close(&ch->last_submit.post_fence);
-       if (ch->sync) {
-               ch->sync->destroy(ch->sync);
-               ch->sync = NULL;
-       }
+       mutex_lock(&ch->submit_lock);
+       gk20a_fence_put(ch->last_submit.pre_fence);
+       gk20a_fence_put(ch->last_submit.post_fence);
+       ch->last_submit.pre_fence = NULL;
+       ch->last_submit.post_fence = NULL;
+       mutex_unlock(&ch->submit_lock);
        WARN_ON(ch->sync);
 
        /* unlink all debug sessions */
@@ -683,9 +667,12 @@ unbind:
 int gk20a_channel_release(struct inode *inode, struct file *filp)
 {
        struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
-       struct gk20a *g = ch->g;
+       struct gk20a *g = ch ? ch->g : NULL;
        int err;
 
+       if (!ch)
+               return 0;
+
        trace_gk20a_channel_release(dev_name(&g->dev->dev));
 
        err = gk20a_busy(ch->g->dev);
@@ -697,12 +684,43 @@ int gk20a_channel_release(struct inode *inode, struct file *filp)
        gk20a_free_channel(ch, true);
        gk20a_idle(ch->g->dev);
 
-       gk20a_put_client(g);
        filp->private_data = NULL;
        return 0;
 }
 
-static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
+static void gk20a_channel_update_runcb_fn(struct work_struct *work)
+{
+       struct channel_gk20a *ch =
+               container_of(work, struct channel_gk20a, update_fn_work);
+       void (*update_fn)(struct channel_gk20a *, void *);
+       void *update_fn_data;
+
+       spin_lock(&ch->update_fn_lock);
+       update_fn = ch->update_fn;
+       update_fn_data = ch->update_fn_data;
+       spin_unlock(&ch->update_fn_lock);
+
+       if (update_fn)
+               update_fn(ch, update_fn_data);
+}
+
+struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
+               void (*update_fn)(struct channel_gk20a *, void *),
+               void *update_fn_data)
+{
+       struct channel_gk20a *ch = gk20a_open_new_channel(g);
+
+       if (ch) {
+               spin_lock(&ch->update_fn_lock);
+               ch->update_fn = update_fn;
+               ch->update_fn_data = update_fn_data;
+               spin_unlock(&ch->update_fn_lock);
+       }
+
+       return ch;
+}
+
+struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 {
        struct fifo_gk20a *f = &g->fifo;
        struct channel_gk20a *ch;
@@ -711,21 +729,23 @@ static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
        if (ch == NULL) {
                /* TBD: we want to make this virtualizable */
                gk20a_err(dev_from_gk20a(g), "out of hw chids");
-               return 0;
+               return NULL;
        }
 
        ch->g = g;
 
-       if (channel_gk20a_alloc_inst(g, ch)) {
+       if (g->ops.fifo.alloc_inst(g, ch)) {
                ch->in_use = false;
                gk20a_err(dev_from_gk20a(g),
                           "failed to open gk20a channel, out of inst mem");
 
-               return 0;
+               return NULL;
        }
-       g->ops.fifo.bind_channel(ch);
        ch->pid = current->pid;
 
+       /* By default, channel is regular (non-TSG) channel */
+       ch->tsgid = NVGPU_INVALID_TSG_ID;
+
        /* reset timeout counter and update timestamp */
        ch->timeout_accumulated_ms = 0;
        ch->timeout_gpfifo_get = 0;
@@ -733,6 +753,7 @@ static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
        ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
        ch->timeout_debug_dump = true;
        ch->has_timedout = false;
+       ch->obj_class = 0;
 
        /* The channel is *not* runnable at this point. It still needs to have
         * an address space bound and allocate a gpfifo and grctx. */
@@ -741,6 +762,15 @@ static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
        init_waitqueue_head(&ch->semaphore_wq);
        init_waitqueue_head(&ch->submit_wq);
 
+       mutex_init(&ch->poll_events.lock);
+       ch->poll_events.events_enabled = false;
+       ch->poll_events.num_pending_events = 0;
+
+       ch->update_fn = NULL;
+       ch->update_fn_data = NULL;
+       spin_lock_init(&ch->update_fn_lock);
+       INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
+
        return ch;
 }
 
@@ -751,23 +781,14 @@ static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
 
        trace_gk20a_channel_open(dev_name(&g->dev->dev));
 
-       err = gk20a_get_client(g);
-       if (err) {
-               gk20a_err(dev_from_gk20a(g),
-                       "failed to get client ref");
-               return err;
-       }
-
        err = gk20a_busy(g->dev);
        if (err) {
-               gk20a_put_client(g);
                gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
                return err;
        }
        ch = gk20a_open_new_channel(g);
        gk20a_idle(g->dev);
        if (!ch) {
-               gk20a_put_client(g);
                gk20a_err(dev_from_gk20a(g),
                        "failed to get f");
                return -ENOMEM;
@@ -781,7 +802,55 @@ int gk20a_channel_open(struct inode *inode, struct file *filp)
 {
        struct gk20a *g = container_of(inode->i_cdev,
                        struct gk20a, channel.cdev);
-       return __gk20a_channel_open(g, filp);
+       int ret;
+
+       gk20a_dbg_fn("start");
+       ret = __gk20a_channel_open(g, filp);
+
+       gk20a_dbg_fn("end");
+       return ret;
+}
+
+int gk20a_channel_open_ioctl(struct gk20a *g,
+               struct nvgpu_channel_open_args *args)
+{
+       int err;
+       int fd;
+       struct file *file;
+       char *name;
+
+       err = get_unused_fd_flags(O_RDWR);
+       if (err < 0)
+               return err;
+       fd = err;
+
+       name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
+                       dev_name(&g->dev->dev), fd);
+       if (!name) {
+               err = -ENOMEM;
+               goto clean_up;
+       }
+
+       file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
+       kfree(name);
+       if (IS_ERR(file)) {
+               err = PTR_ERR(file);
+               goto clean_up;
+       }
+       fd_install(fd, file);
+
+       err = __gk20a_channel_open(g, file);
+       if (err)
+               goto clean_up_file;
+
+       args->channel_fd = fd;
+       return 0;
+
+clean_up_file:
+       fput(file);
+clean_up:
+       put_unused_fd(fd);
+       return err;
 }
 
 /* allocate private cmd buffer.
@@ -1063,8 +1132,8 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c)
 }
 
 
-static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
-                                     struct nvhost_alloc_gpfifo_args *args)
+int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
+               struct nvgpu_alloc_gpfifo_args *args)
 {
        struct gk20a *g = c->g;
        struct device *d = dev_from_gk20a(g);
@@ -1078,22 +1147,25 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
           and another one after, for internal usage. Triple the requested size. */
        gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
 
-       if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
+       if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
                c->vpr = true;
 
-       /* an address space needs to have been bound at this point.   */
+       /* an address space needs to have been bound at this point. */
        if (!gk20a_channel_as_bound(c)) {
                gk20a_err(d,
                            "not bound to an address space at time of gpfifo"
-                           " allocation.  Attempting to create and bind to"
-                           " one...");
+                           " allocation.");
                return -EINVAL;
        }
        ch_vm = c->vm;
 
        c->cmds_pending = false;
-       gk20a_channel_fence_close(&c->last_submit.pre_fence);
-       gk20a_channel_fence_close(&c->last_submit.post_fence);
+       mutex_lock(&c->submit_lock);
+       gk20a_fence_put(c->last_submit.pre_fence);
+       gk20a_fence_put(c->last_submit.post_fence);
+       c->last_submit.pre_fence = NULL;
+       c->last_submit.post_fence = NULL;
+       mutex_unlock(&c->submit_lock);
 
        c->ramfc.offset = 0;
        c->ramfc.size = ram_in_ramfc_s() / 8;
@@ -1142,10 +1214,11 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
                c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
 
-       channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
-
        channel_gk20a_setup_userd(c);
-       channel_gk20a_commit_userd(c);
+
+       err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+       if (err)
+               goto clean_up_unmap;
 
        /* TBD: setup engine contexts */
 
@@ -1157,6 +1230,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        if (err)
                goto clean_up_unmap;
 
+       g->ops.fifo.bind_channel(c);
+
        gk20a_free_sgtable(&sgt);
 
        gk20a_dbg_fn("done");
@@ -1177,18 +1252,6 @@ clean_up:
        return err;
 }
 
-static inline int wfi_cmd_size(void)
-{
-       return 2;
-}
-void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
-{
-       /* wfi */
-       cmd->ptr[(*i)++] = 0x2001001E;
-       /* handle, ignored */
-       cmd->ptr[(*i)++] = 0x00000000;
-}
-
 static inline bool check_gp_put(struct gk20a *g,
                                struct channel_gk20a *c)
 {
@@ -1276,8 +1339,10 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
                }
        }
 
-       gk20a_channel_fence_close(&c->last_submit.pre_fence);
-       gk20a_channel_fence_close(&c->last_submit.post_fence);
+       gk20a_fence_put(c->last_submit.pre_fence);
+       gk20a_fence_put(c->last_submit.post_fence);
+       c->last_submit.pre_fence = NULL;
+       c->last_submit.post_fence = NULL;
 
        err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
        if (unlikely(err)) {
@@ -1285,7 +1350,7 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
                return err;
        }
 
-       WARN_ON(!c->last_submit.post_fence.wfi);
+       WARN_ON(!c->last_submit.post_fence->wfi);
 
        c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
        c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
@@ -1314,7 +1379,8 @@ static u32 get_gp_free_count(struct channel_gk20a *c)
        return gp_free_count(c);
 }
 
-static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
+static void trace_write_pushbuffer(struct channel_gk20a *c,
+                                  struct nvgpu_gpfifo *g)
 {
        void *mem = NULL;
        unsigned int words;
@@ -1350,9 +1416,21 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
        }
 }
 
+static void trace_write_pushbuffer_range(struct channel_gk20a *c,
+                                        struct nvgpu_gpfifo *g,
+                                        int count)
+{
+       if (gk20a_debug_trace_cmdbuf) {
+               int i;
+               struct nvgpu_gpfifo *gp = g;
+               for (i = 0; i < count; i++, gp++)
+                       trace_write_pushbuffer(c, gp);
+       }
+}
+
 static int gk20a_channel_add_job(struct channel_gk20a *c,
-                                struct gk20a_channel_fence *pre_fence,
-                                struct gk20a_channel_fence *post_fence)
+                                struct gk20a_fence *pre_fence,
+                                struct gk20a_fence *post_fence)
 {
        struct vm_gk20a *vm = c->vm;
        struct channel_gk20a_job *job = NULL;
@@ -1377,8 +1455,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 
        job->num_mapped_buffers = num_mapped_buffers;
        job->mapped_buffers = mapped_buffers;
-       gk20a_channel_fence_dup(pre_fence, &job->pre_fence);
-       gk20a_channel_fence_dup(post_fence, &job->post_fence);
+       job->pre_fence = gk20a_fence_get(pre_fence);
+       job->post_fence = gk20a_fence_get(post_fence);
 
        mutex_lock(&c->jobs_lock);
        list_add_tail(&job->list, &c->jobs);
@@ -1389,35 +1467,36 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 
 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
 {
-       struct gk20a *g = c->g;
        struct vm_gk20a *vm = c->vm;
        struct channel_gk20a_job *job, *n;
-       int i;
+
+       trace_gk20a_channel_update(c);
 
        wake_up(&c->submit_wq);
 
        mutex_lock(&c->submit_lock);
        mutex_lock(&c->jobs_lock);
        list_for_each_entry_safe(job, n, &c->jobs, list) {
-               bool completed = WARN_ON(!c->sync) ||
-                       c->sync->is_expired(c->sync, &job->post_fence);
+               bool completed = gk20a_fence_is_expired(job->post_fence);
                if (!completed)
                        break;
 
+               c->sync->signal_timeline(c->sync);
+
                gk20a_vm_put_buffers(vm, job->mapped_buffers,
                                job->num_mapped_buffers);
 
                /* Close the fences (this will unref the semaphores and release
                 * them to the pool). */
-               gk20a_channel_fence_close(&job->pre_fence);
-               gk20a_channel_fence_close(&job->post_fence);
+               gk20a_fence_put(job->pre_fence);
+               gk20a_fence_put(job->post_fence);
 
                /* job is done. release its reference to vm */
                gk20a_vm_put(vm);
 
                list_del_init(&job->list);
                kfree(job);
-               gk20a_idle(g->dev);
+               gk20a_idle(c->g->dev);
        }
 
        /*
@@ -1428,7 +1507,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
         */
        if (list_empty(&c->jobs)) {
                if (c->sync && c->sync->aggressive_destroy &&
-                         c->sync->is_expired(c->sync, &c->last_submit.post_fence)) {
+                         gk20a_fence_is_expired(c->last_submit.post_fence)) {
                        c->sync->destroy(c->sync);
                        c->sync = NULL;
                }
@@ -1436,50 +1515,53 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
        mutex_unlock(&c->jobs_lock);
        mutex_unlock(&c->submit_lock);
 
-       for (i = 0; i < nr_completed; i++)
-               gk20a_idle(c->g->dev);
+       if (c->update_fn)
+               schedule_work(&c->update_fn_work);
 }
 
-void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
-{
-       /* syncpoint_a */
-       ptr[0] = 0x2001001C;
-       /* payload */
-       ptr[1] = thresh;
-       /* syncpoint_b */
-       ptr[2] = 0x2001001D;
-       /* syncpt_id, switch_en, wait */
-       ptr[3] = (id << 8) | 0x10;
-}
-
-static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
-                               struct nvhost_gpfifo *gpfifo,
+int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
+                               struct nvgpu_gpfifo *gpfifo,
                                u32 num_entries,
-                               struct nvhost_fence *fence,
-                               u32 flags)
+                               u32 flags,
+                               struct nvgpu_fence *fence,
+                               struct gk20a_fence **fence_out)
 {
        struct gk20a *g = c->g;
        struct device *d = dev_from_gk20a(g);
        int err = 0;
-       int i;
+       int start, end;
        int wait_fence_fd = -1;
        struct priv_cmd_entry *wait_cmd = NULL;
        struct priv_cmd_entry *incr_cmd = NULL;
-       struct gk20a_channel_fence pre_fence = { 0 };
-       struct gk20a_channel_fence post_fence = { 0 };
+       struct gk20a_fence *pre_fence = NULL;
+       struct gk20a_fence *post_fence = NULL;
        /* we might need two extra gpfifo entries - one for pre fence
         * and one for post fence. */
        const int extra_entries = 2;
-       bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
+       bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
 
        if (c->has_timedout)
                return -ETIMEDOUT;
 
-       if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
-                     NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
+       /* fifo not large enough for request. Return error immediately */
+       if (c->gpfifo.entry_num < num_entries) {
+               gk20a_err(d, "not enough gpfifo space allocated");
+               return -ENOMEM;
+       }
+
+       if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
+                     NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
            !fence)
                return -EINVAL;
 
+       /* an address space needs to have been bound at this point. */
+       if (!gk20a_channel_as_bound(c)) {
+               gk20a_err(d,
+                           "not bound to an address space at time of gpfifo"
+                           " submission.");
+               return -EINVAL;
+       }
+
 #ifdef CONFIG_DEBUG_FS
        /* update debug settings */
        if (g->ops.ltc.sync_debugfs)
@@ -1499,7 +1581,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                          c->hw_chid,
                                          num_entries,
                                          flags,
-                                         fence->syncpt_id, fence->value);
+                                         fence ? fence->id : 0,
+                                         fence ? fence->value : 0);
        check_gp_put(g, c);
        update_gp_get(g, c);
 
@@ -1511,14 +1594,16 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* We don't know what context is currently running...                */
        /* Note also: there can be more than one context associated with the */
        /* address space (vm).   */
-       gk20a_mm_tlb_invalidate(c->vm);
+       g->ops.mm.tlb_invalidate(c->vm);
 
        /* Make sure we have enough space for gpfifo entries. If not,
         * wait for signals from completed submits */
        if (gp_free_count(c) < num_entries + extra_entries) {
+               trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
                err = wait_event_interruptible(c->submit_wq,
                        get_gp_free_count(c) >= num_entries + extra_entries ||
                        c->has_timedout);
+               trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
        }
 
        if (c->has_timedout) {
@@ -1527,7 +1612,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        }
 
        if (err) {
-               gk20a_err(d, "not enough gpfifo space");
+               gk20a_err(d, "timeout waiting for gpfifo space");
                err = -EAGAIN;
                goto clean_up;
        }
@@ -1550,13 +1635,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
         * the only reason this isn't being unceremoniously killed is to
         * keep running some tests which trigger this condition
         */
-       if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
-               if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
-                       wait_fence_fd = fence->syncpt_id;
+       if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+               if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+                       wait_fence_fd = fence->id;
                        err = c->sync->wait_fd(c->sync, wait_fence_fd,
                                        &wait_cmd, &pre_fence);
                } else {
-                       err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
+                       err = c->sync->wait_syncpt(c->sync, fence->id,
                                        fence->value, &wait_cmd, &pre_fence);
                }
        }
@@ -1568,18 +1653,9 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
        /* always insert syncpt increment at end of gpfifo submission
           to keep track of method completion for idle railgating */
-       if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
-                       flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
-               err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd,
-                                           &post_fence,
-                                           need_wfi,
-                                           &fence->syncpt_id);
-       else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-               err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
-                                               &post_fence,
-                                               need_wfi,
-                                               &fence->syncpt_id,
-                                               &fence->value);
+       if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+               err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
+                                        &post_fence, need_wfi);
        else
                err = c->sync->incr(c->sync, &incr_cmd,
                                    &post_fence);
@@ -1604,15 +1680,34 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                wait_cmd->gp_put = c->gpfifo.put;
        }
 
-       for (i = 0; i < num_entries; i++) {
-               c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
-                       gpfifo[i].entry0; /* cmd buf va low 32 */
-               c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
-                       gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
-               trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
-               c->gpfifo.put = (c->gpfifo.put + 1) &
-                       (c->gpfifo.entry_num - 1);
+       /*
+        * Copy source gpfifo entries into the gpfifo ring buffer,
+        * potentially splitting into two memcpies to handle the
+        * ring buffer wrap-around case.
+        */
+       start = c->gpfifo.put;
+       end = start + num_entries;
+
+       if (end > c->gpfifo.entry_num) {
+               int length0 = c->gpfifo.entry_num - start;
+               int length1 = num_entries - length0;
+
+               memcpy(c->gpfifo.cpu_va + start, gpfifo,
+                      length0 * sizeof(*gpfifo));
+
+               memcpy(c->gpfifo.cpu_va, gpfifo + length0,
+                      length1 * sizeof(*gpfifo));
+
+               trace_write_pushbuffer_range(c, gpfifo, length0);
+               trace_write_pushbuffer_range(c, gpfifo + length0, length1);
+       } else {
+               memcpy(c->gpfifo.cpu_va + start, gpfifo,
+                      num_entries * sizeof(*gpfifo));
+
+               trace_write_pushbuffer_range(c, gpfifo, num_entries);
        }
+       c->gpfifo.put = (c->gpfifo.put + num_entries) &
+               (c->gpfifo.entry_num - 1);
 
        if (incr_cmd) {
                c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
@@ -1630,13 +1725,15 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                incr_cmd->gp_put = c->gpfifo.put;
        }
 
-       gk20a_channel_fence_close(&c->last_submit.pre_fence);
-       gk20a_channel_fence_close(&c->last_submit.post_fence);
+       gk20a_fence_put(c->last_submit.pre_fence);
+       gk20a_fence_put(c->last_submit.post_fence);
        c->last_submit.pre_fence = pre_fence;
        c->last_submit.post_fence = post_fence;
+       if (fence_out)
+               *fence_out = gk20a_fence_get(post_fence);
 
        /* TODO! Check for errors... */
-       gk20a_channel_add_job(c, &pre_fence, &post_fence);
+       gk20a_channel_add_job(c, pre_fence, post_fence);
 
        c->cmds_pending = true;
        gk20a_bar1_writel(g,
@@ -1649,7 +1746,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                             c->hw_chid,
                                             num_entries,
                                             flags,
-                                            fence->syncpt_id, fence->value);
+                                            post_fence->syncpt_id,
+                                            post_fence->syncpt_value);
 
        gk20a_dbg_info("post-submit put %d, get %d, size %d",
                c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
@@ -1661,17 +1759,12 @@ clean_up:
        gk20a_err(d, "fail");
        free_priv_cmdbuf(c, wait_cmd);
        free_priv_cmdbuf(c, incr_cmd);
-       gk20a_channel_fence_close(&pre_fence);
-       gk20a_channel_fence_close(&post_fence);
+       gk20a_fence_put(pre_fence);
+       gk20a_fence_put(post_fence);
        gk20a_idle(g->dev);
        return err;
 }
 
-void gk20a_remove_channel_support(struct channel_gk20a *c)
-{
-
-}
-
 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 {
        struct channel_gk20a *c = g->fifo.channel+chid;
@@ -1679,7 +1772,6 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
        c->in_use = false;
        c->hw_chid = chid;
        c->bound = false;
-       c->remove_support = gk20a_remove_channel_support;
        mutex_init(&c->jobs_lock);
        mutex_init(&c->submit_lock);
        INIT_LIST_HEAD(&c->jobs);
@@ -1695,7 +1787,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
 {
        int err = 0;
-       struct gk20a_channel_fence *fence = &ch->last_submit.post_fence;
+       struct gk20a_fence *fence = ch->last_submit.post_fence;
 
        if (!ch->cmds_pending)
                return 0;
@@ -1704,26 +1796,25 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
        if (ch->has_timedout)
                return -ETIMEDOUT;
 
-       if (!(fence->valid && fence->wfi)) {
+       if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
                gk20a_dbg_fn("issuing wfi, incr to finish the channel");
                err = gk20a_channel_submit_wfi(ch);
+               fence = ch->last_submit.post_fence;
        }
        if (err)
                return err;
 
-       BUG_ON(!(fence->valid && fence->wfi));
+       BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
 
        gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
-                     fence->thresh, fence->semaphore);
+                    fence->syncpt_value, fence->semaphore);
 
-       if (ch->sync) {
-               err = ch->sync->wait_cpu(ch->sync, fence, timeout);
-               if (WARN_ON(err))
-                       dev_warn(dev_from_gk20a(ch->g),
-                              "timed out waiting for gk20a channel to finish");
-               else
-                       ch->cmds_pending = false;
-       }
+       err = gk20a_fence_wait(fence, timeout);
+       if (WARN_ON(err))
+               dev_warn(dev_from_gk20a(ch->g),
+                      "timed out waiting for gk20a channel to finish");
+       else
+               ch->cmds_pending = false;
 
        return err;
 }
@@ -1776,7 +1867,7 @@ cleanup_put:
 }
 
 static int gk20a_channel_wait(struct channel_gk20a *ch,
-                             struct nvhost_wait_args *args)
+                             struct nvgpu_wait_args *args)
 {
        struct device *d = dev_from_gk20a(ch->g);
        struct dma_buf *dmabuf;
@@ -1793,14 +1884,14 @@ static int gk20a_channel_wait(struct channel_gk20a *ch,
        if (ch->has_timedout)
                return -ETIMEDOUT;
 
-       if (args->timeout == NVHOST_NO_TIMEOUT)
+       if (args->timeout == NVGPU_NO_TIMEOUT)
                timeout = MAX_SCHEDULE_TIMEOUT;
        else
                timeout = (u32)msecs_to_jiffies(args->timeout);
 
        switch (args->type) {
-       case NVHOST_WAIT_TYPE_NOTIFIER:
-               id = args->condition.notifier.nvmap_handle;
+       case NVGPU_WAIT_TYPE_NOTIFIER:
+               id = args->condition.notifier.dmabuf_fd;
                offset = args->condition.notifier.offset;
 
                dmabuf = dma_buf_get(id);
@@ -1845,9 +1936,9 @@ notif_clean_up:
                dma_buf_vunmap(dmabuf, notif);
                return ret;
 
-       case NVHOST_WAIT_TYPE_SEMAPHORE:
+       case NVGPU_WAIT_TYPE_SEMAPHORE:
                ret = gk20a_channel_wait_semaphore(ch,
-                               args->condition.semaphore.nvmap_handle,
+                               args->condition.semaphore.dmabuf_fd,
                                args->condition.semaphore.offset,
                                args->condition.semaphore.payload,
                                timeout);
@@ -1862,21 +1953,134 @@ notif_clean_up:
        return ret;
 }
 
+/* poll events for semaphores */
+
+static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
+{
+       gk20a_dbg_fn("");
+
+       mutex_lock(&ev->lock);
+
+       ev->events_enabled = true;
+       ev->num_pending_events = 0;
+
+       mutex_unlock(&ev->lock);
+}
+
+static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
+{
+       gk20a_dbg_fn("");
+
+       mutex_lock(&ev->lock);
+
+       ev->events_enabled = false;
+       ev->num_pending_events = 0;
+
+       mutex_unlock(&ev->lock);
+}
+
+static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
+{
+       gk20a_dbg_fn("");
+
+       mutex_lock(&ev->lock);
+
+       if (ev->events_enabled &&
+                       ev->num_pending_events > 0)
+               ev->num_pending_events--;
+
+       mutex_unlock(&ev->lock);
+}
+
+static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
+                         struct nvgpu_channel_events_ctrl_args *args)
+{
+       int ret = 0;
+
+       gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
+                       "channel events ctrl cmd %d", args->cmd);
+
+       switch (args->cmd) {
+       case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
+               gk20a_channel_events_enable(&ch->poll_events);
+               break;
+
+       case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
+               gk20a_channel_events_disable(&ch->poll_events);
+               break;
+
+       case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
+               gk20a_channel_events_clear(&ch->poll_events);
+               break;
+
+       default:
+               gk20a_err(dev_from_gk20a(ch->g),
+                          "unrecognized channel events ctrl cmd: 0x%x",
+                          args->cmd);
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+void gk20a_channel_event(struct channel_gk20a *ch)
+{
+       mutex_lock(&ch->poll_events.lock);
+
+       if (ch->poll_events.events_enabled) {
+               gk20a_dbg_info("posting event on channel id %d",
+                               ch->hw_chid);
+               gk20a_dbg_info("%d channel events pending",
+                               ch->poll_events.num_pending_events);
+
+               ch->poll_events.num_pending_events++;
+               /* not waking up here, caller does that */
+       }
+
+       mutex_unlock(&ch->poll_events.lock);
+}
+
+unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
+{
+       unsigned int mask = 0;
+       struct channel_gk20a *ch = filep->private_data;
+
+       gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
+
+       poll_wait(filep, &ch->semaphore_wq, wait);
+
+       mutex_lock(&ch->poll_events.lock);
+
+       if (ch->poll_events.events_enabled &&
+                       ch->poll_events.num_pending_events > 0) {
+               gk20a_dbg_info("found pending event on channel id %d",
+                               ch->hw_chid);
+               gk20a_dbg_info("%d channel events pending",
+                               ch->poll_events.num_pending_events);
+               mask = (POLLPRI | POLLIN);
+       }
+
+       mutex_unlock(&ch->poll_events.lock);
+
+       return mask;
+}
+
 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
                u32 priority)
 {
        u32 timeslice_timeout;
        /* set priority of graphics channel */
        switch (priority) {
-       case NVHOST_PRIORITY_LOW:
+       case NVGPU_PRIORITY_LOW:
                /* 64 << 3 = 512us */
                timeslice_timeout = 64;
                break;
-       case NVHOST_PRIORITY_MEDIUM:
+       case NVGPU_PRIORITY_MEDIUM:
                /* 128 << 3 = 1024us */
                timeslice_timeout = 128;
                break;
-       case NVHOST_PRIORITY_HIGH:
+       case NVGPU_PRIORITY_HIGH:
                /* 255 << 3 = 2048us */
                timeslice_timeout = 255;
                break;
@@ -1890,14 +2094,14 @@ static int gk20a_channel_set_priority(struct channel_gk20a *ch,
 }
 
 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
-                           struct nvhost_zcull_bind_args *args)
+                           struct nvgpu_zcull_bind_args *args)
 {
        struct gk20a *g = ch->g;
        struct gr_gk20a *gr = &g->gr;
 
        gk20a_dbg_fn("");
 
-       return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
+       return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
                                args->gpu_va, args->mode);
 }
 
@@ -1908,51 +2112,39 @@ int gk20a_channel_suspend(struct gk20a *g)
        struct fifo_gk20a *f = &g->fifo;
        u32 chid;
        bool channels_in_use = false;
-       struct device *d = dev_from_gk20a(g);
        int err;
 
        gk20a_dbg_fn("");
 
-       /* idle the engine by submitting WFI on non-KEPLER_C channel */
-       for (chid = 0; chid < f->num_channels; chid++) {
-               struct channel_gk20a *c = &f->channel[chid];
-               if (c->in_use && c->obj_class != KEPLER_C) {
-                       err = gk20a_channel_submit_wfi(c);
-                       if (err) {
-                               gk20a_err(d, "cannot idle channel %d\n",
-                                               chid);
-                               return err;
-                       }
-
-                       if (c->sync)
-                               c->sync->wait_cpu(c->sync,
-                                                 &c->last_submit.post_fence,
-                                                 500000);
-                       break;
-               }
-       }
+       /* wait for engine idle */
+       err = g->ops.fifo.wait_engine_idle(g);
+       if (err)
+               return err;
 
        for (chid = 0; chid < f->num_channels; chid++) {
-               if (f->channel[chid].in_use) {
+               struct channel_gk20a *ch = &f->channel[chid];
+               if (ch->in_use) {
 
                        gk20a_dbg_info("suspend channel %d", chid);
                        /* disable channel */
-                       gk20a_writel(g, ccsr_channel_r(chid),
-                               gk20a_readl(g, ccsr_channel_r(chid)) |
-                               ccsr_channel_enable_clr_true_f());
+                       g->ops.fifo.disable_channel(ch);
                        /* preempt the channel */
-                       gk20a_fifo_preempt_channel(g, chid);
+                       g->ops.fifo.preempt_channel(g, chid);
+                       /* wait for channel update notifiers */
+                       if (ch->update_fn &&
+                                       work_pending(&ch->update_fn_work))
+                               flush_work(&ch->update_fn_work);
 
                        channels_in_use = true;
                }
        }
 
        if (channels_in_use) {
-               gk20a_fifo_update_runlist(g, 0, ~0, false, true);
+               g->ops.fifo.update_runlist(g, 0, ~0, false, true);
 
                for (chid = 0; chid < f->num_channels; chid++) {
                        if (f->channel[chid].in_use)
-                               channel_gk20a_unbind(&f->channel[chid]);
+                               g->ops.fifo.unbind_channel(&f->channel[chid]);
                }
        }
 
@@ -1979,7 +2171,7 @@ int gk20a_channel_resume(struct gk20a *g)
        }
 
        if (channels_in_use)
-               gk20a_fifo_update_runlist(g, 0, ~0, true, true);
+               g->ops.fifo.update_runlist(g, 0, ~0, true, true);
 
        gk20a_dbg_fn("done");
        return 0;
@@ -2003,8 +2195,9 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g)
 
 static int gk20a_ioctl_channel_submit_gpfifo(
        struct channel_gk20a *ch,
-       struct nvhost_submit_gpfifo_args *args)
+       struct nvgpu_submit_gpfifo_args *args)
 {
+       struct gk20a_fence *fence_out;
        void *gpfifo;
        u32 size;
        int ret = 0;
@@ -2014,7 +2207,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
        if (ch->has_timedout)
                return -ETIMEDOUT;
 
-       size = args->num_entries * sizeof(struct nvhost_gpfifo);
+       size = args->num_entries * sizeof(struct nvgpu_gpfifo);
 
        gpfifo = kzalloc(size, GFP_KERNEL);
        if (!gpfifo)
@@ -2027,7 +2220,26 @@ static int gk20a_ioctl_channel_submit_gpfifo(
        }
 
        ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
-                                       &args->fence, args->flags);
+                                         args->flags, &args->fence,
+                                         &fence_out);
+
+       if (ret)
+               goto clean_up;
+
+       /* Convert fence_out to something we can pass back to user space. */
+       if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
+               if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+                       int fd = gk20a_fence_install_fd(fence_out);
+                       if (fd < 0)
+                               ret = fd;
+                       else
+                               args->fence.id = fd;
+               } else {
+                       args->fence.id = fence_out->syncpt_id;
+                       args->fence.value = fence_out->syncpt_value;
+               }
+       }
+       gk20a_fence_put(fence_out);
 
 clean_up:
        kfree(gpfifo);
@@ -2037,6 +2249,11 @@ clean_up:
 void gk20a_init_channel(struct gpu_ops *gops)
 {
        gops->fifo.bind_channel = channel_gk20a_bind;
+       gops->fifo.unbind_channel = channel_gk20a_unbind;
+       gops->fifo.disable_channel = channel_gk20a_disable;
+       gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
+       gops->fifo.free_inst = channel_gk20a_free_inst;
+       gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
 }
 
 long gk20a_channel_ioctl(struct file *filp,
@@ -2044,14 +2261,16 @@ long gk20a_channel_ioctl(struct file *filp,
 {
        struct channel_gk20a *ch = filp->private_data;
        struct platform_device *dev = ch->g->dev;
-       u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
+       u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
        int err = 0;
 
-       if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
+       gk20a_dbg_fn("start %d", _IOC_NR(cmd));
+
+       if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
                (_IOC_NR(cmd) == 0) ||
-               (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
-               (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
-               return -EFAULT;
+               (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
+               (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
+               return -EINVAL;
 
        if (_IOC_DIR(cmd) & _IOC_WRITE) {
                if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
@@ -2059,47 +2278,13 @@ long gk20a_channel_ioctl(struct file *filp,
        }
 
        switch (cmd) {
-       case NVHOST_IOCTL_CHANNEL_OPEN:
-       {
-               int fd;
-               struct file *file;
-               char *name;
-
-               err = get_unused_fd_flags(O_RDWR);
-               if (err < 0)
-                       break;
-               fd = err;
-
-               name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
-                               dev_name(&dev->dev), fd);
-               if (!name) {
-                       err = -ENOMEM;
-                       put_unused_fd(fd);
-                       break;
-               }
-
-               file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
-               kfree(name);
-               if (IS_ERR(file)) {
-                       err = PTR_ERR(file);
-                       put_unused_fd(fd);
-                       break;
-               }
-               fd_install(fd, file);
-
-               err = __gk20a_channel_open(ch->g, file);
-               if (err) {
-                       put_unused_fd(fd);
-                       fput(file);
-                       break;
-               }
-
-               ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
+       case NVGPU_IOCTL_CHANNEL_OPEN:
+               err = gk20a_channel_open_ioctl(ch->g,
+                       (struct nvgpu_channel_open_args *)buf);
                break;
-       }
-       case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
+       case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
                break;
-       case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
+       case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2107,11 +2292,11 @@ long gk20a_channel_ioctl(struct file *filp,
                                __func__, cmd);
                        return err;
                }
-               err = gk20a_alloc_obj_ctx(ch,
-                               (struct nvhost_alloc_obj_ctx_args *)buf);
+               err = ch->g->ops.gr.alloc_obj_ctx(ch,
+                               (struct nvgpu_alloc_obj_ctx_args *)buf);
                gk20a_idle(dev);
                break;
-       case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
+       case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2119,11 +2304,11 @@ long gk20a_channel_ioctl(struct file *filp,
                                __func__, cmd);
                        return err;
                }
-               err = gk20a_free_obj_ctx(ch,
-                               (struct nvhost_free_obj_ctx_args *)buf);
+               err = ch->g->ops.gr.free_obj_ctx(ch,
+                               (struct nvgpu_free_obj_ctx_args *)buf);
                gk20a_idle(dev);
                break;
-       case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
+       case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2132,14 +2317,14 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                err = gk20a_alloc_channel_gpfifo(ch,
-                               (struct nvhost_alloc_gpfifo_args *)buf);
+                               (struct nvgpu_alloc_gpfifo_args *)buf);
                gk20a_idle(dev);
                break;
-       case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
+       case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
                err = gk20a_ioctl_channel_submit_gpfifo(ch,
-                               (struct nvhost_submit_gpfifo_args *)buf);
+                               (struct nvgpu_submit_gpfifo_args *)buf);
                break;
-       case NVHOST_IOCTL_CHANNEL_WAIT:
+       case NVGPU_IOCTL_CHANNEL_WAIT:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2148,10 +2333,10 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                err = gk20a_channel_wait(ch,
-                               (struct nvhost_wait_args *)buf);
+                               (struct nvgpu_wait_args *)buf);
                gk20a_idle(dev);
                break;
-       case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
+       case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2160,10 +2345,10 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                err = gk20a_channel_zcull_bind(ch,
-                               (struct nvhost_zcull_bind_args *)buf);
+                               (struct nvgpu_zcull_bind_args *)buf);
                gk20a_idle(dev);
                break;
-       case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
+       case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2172,11 +2357,11 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                err = gk20a_init_error_notifier(ch,
-                               (struct nvhost_set_error_notifier *)buf);
+                               (struct nvgpu_set_error_notifier *)buf);
                gk20a_idle(dev);
                break;
 #ifdef CONFIG_GK20A_CYCLE_STATS
-       case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
+       case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2185,37 +2370,37 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                err = gk20a_channel_cycle_stats(ch,
-                               (struct nvhost_cycle_stats_args *)buf);
+                               (struct nvgpu_cycle_stats_args *)buf);
                gk20a_idle(dev);
                break;
 #endif
-       case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
+       case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
        {
                u32 timeout =
-                       (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+                       (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
                gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
                           timeout, ch->hw_chid);
                ch->timeout_ms_max = timeout;
                break;
        }
-       case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
+       case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
        {
                u32 timeout =
-                       (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+                       (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
                bool timeout_debug_dump = !((u32)
-                       ((struct nvhost_set_timeout_ex_args *)buf)->flags &
-                       (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
+                       ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
+                       (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
                gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
                           timeout, ch->hw_chid);
                ch->timeout_ms_max = timeout;
                ch->timeout_debug_dump = timeout_debug_dump;
                break;
        }
-       case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
-               ((struct nvhost_get_param_args *)buf)->value =
+       case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
+               ((struct nvgpu_get_param_args *)buf)->value =
                        ch->has_timedout;
                break;
-       case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
+       case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
                err = gk20a_busy(dev);
                if (err) {
                        dev_err(&dev->dev,
@@ -2224,11 +2409,65 @@ long gk20a_channel_ioctl(struct file *filp,
                        return err;
                }
                gk20a_channel_set_priority(ch,
-                       ((struct nvhost_set_priority_args *)buf)->priority);
+                       ((struct nvgpu_set_priority_args *)buf)->priority);
+               gk20a_idle(dev);
+               break;
+       case NVGPU_IOCTL_CHANNEL_ENABLE:
+               err = gk20a_busy(dev);
+               if (err) {
+                       dev_err(&dev->dev,
+                               "%s: failed to host gk20a for ioctl cmd: 0x%x",
+                               __func__, cmd);
+                       return err;
+               }
+               /* enable channel */
+               gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+                       gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
+                       ccsr_channel_enable_set_true_f());
                gk20a_idle(dev);
                break;
+       case NVGPU_IOCTL_CHANNEL_DISABLE:
+               err = gk20a_busy(dev);
+               if (err) {
+                       dev_err(&dev->dev,
+                               "%s: failed to host gk20a for ioctl cmd: 0x%x",
+                               __func__, cmd);
+                       return err;
+               }
+               /* disable channel */
+               gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+                       gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
+                       ccsr_channel_enable_clr_true_f());
+               gk20a_idle(dev);
+               break;
+       case NVGPU_IOCTL_CHANNEL_PREEMPT:
+               err = gk20a_busy(dev);
+               if (err) {
+                       dev_err(&dev->dev,
+                               "%s: failed to host gk20a for ioctl cmd: 0x%x",
+                               __func__, cmd);
+                       return err;
+               }
+               err = gk20a_fifo_preempt(ch->g, ch);
+               gk20a_idle(dev);
+               break;
+       case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
+               err = gk20a_busy(dev);
+               if (err) {
+                       dev_err(&dev->dev,
+                               "%s: failed to host gk20a for ioctl cmd: 0x%x",
+                               __func__, cmd);
+                       return err;
+               }
+               err = gk20a_fifo_force_reset_ch(ch, true);
+               gk20a_idle(dev);
+               break;
+       case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
+               err = gk20a_channel_events_ctrl(ch,
+                          (struct nvgpu_channel_events_ctrl_args *)buf);
+               break;
        default:
-               dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
+               dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
                err = -ENOTTY;
                break;
        }
@@ -2236,5 +2475,7 @@ long gk20a_channel_ioctl(struct file *filp,
        if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
                err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
 
+       gk20a_dbg_fn("end");
+
        return err;
 }