gpu: nvgpu: add open channel ioctl to ctrl node
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
index cffac38..d61656f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * GK20A Graphics channel
  *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -87,41 +87,13 @@ static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
 
 int channel_gk20a_commit_va(struct channel_gk20a *c)
 {
-       u64 addr;
-       u32 addr_lo;
-       u32 addr_hi;
-       void *inst_ptr;
-
        gk20a_dbg_fn("");
 
-       inst_ptr = c->inst_block.cpuva;
-       if (!inst_ptr)
+       if (!c->inst_block.cpuva)
                return -ENOMEM;
 
-       addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
-       addr_lo = u64_lo32(addr >> 12);
-       addr_hi = u64_hi32(addr);
-
-       gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
-                  (u64)addr, addr_lo, addr_hi);
-
-       gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
-               ram_in_page_dir_base_target_vid_mem_f() |
-               ram_in_page_dir_base_vol_true_f() |
-               ram_in_page_dir_base_lo_f(addr_lo));
-
-       gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
-               ram_in_page_dir_base_hi_f(addr_hi));
-
-       gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
-                u64_lo32(c->vm->va_limit) | 0xFFF);
-
-       gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
-               ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
-
-       if (c->g->ops.mm.set_big_page_size)
-               c->g->ops.mm.set_big_page_size(c->g, inst_ptr,
-                                              c->vm->gmmu_page_sizes[gmmu_page_size_big]);
+       gk20a_init_inst_block(&c->inst_block, c->vm,
+                       c->vm->gmmu_page_sizes[gmmu_page_size_big]);
 
        return 0;
 }
@@ -341,54 +313,24 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
 
 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-       struct device *d = dev_from_gk20a(g);
-       int err = 0;
-       dma_addr_t iova;
+       int err;
 
        gk20a_dbg_fn("");
 
-       ch->inst_block.size = ram_in_alloc_size_v();
-       ch->inst_block.cpuva = dma_alloc_coherent(d,
-                                       ch->inst_block.size,
-                                       &iova,
-                                       GFP_KERNEL);
-       if (!ch->inst_block.cpuva) {
-               gk20a_err(d, "%s: memory allocation failed\n", __func__);
-               err = -ENOMEM;
-               goto clean_up;
-       }
-
-       ch->inst_block.iova = iova;
-       ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
-                                                       ch->inst_block.iova);
-       if (!ch->inst_block.cpu_pa) {
-               gk20a_err(d, "%s: failed to get physical address\n", __func__);
-               err = -ENOMEM;
-               goto clean_up;
-       }
+       err = gk20a_alloc_inst_block(g, &ch->inst_block);
+       if (err)
+               return err;
 
        gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
                ch->hw_chid, (u64)ch->inst_block.cpu_pa);
 
        gk20a_dbg_fn("done");
        return 0;
-
-clean_up:
-       gk20a_err(d, "fail");
-       g->ops.fifo.free_inst(g, ch);
-       return err;
 }
 
 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-       struct device *d = dev_from_gk20a(g);
-
-       if (ch->inst_block.cpuva)
-               dma_free_coherent(d, ch->inst_block.size,
-                               ch->inst_block.cpuva, ch->inst_block.iova);
-       ch->inst_block.cpuva = NULL;
-       ch->inst_block.iova = 0;
-       memset(&ch->inst_block, 0, sizeof(struct inst_desc));
+       gk20a_free_inst_block(g, &ch->inst_block);
 }
 
 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
@@ -419,8 +361,10 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
        bool released_job_semaphore = false;
 
        /* ensure no fences are pending */
+       mutex_lock(&ch->submit_lock);
        if (ch->sync)
                ch->sync->set_min_eq_max(ch->sync);
+       mutex_unlock(&ch->submit_lock);
 
        /* release all job semaphores (applies only to jobs that use
           semaphore synchronization) */
@@ -797,7 +741,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 
                return NULL;
        }
-       g->ops.fifo.bind_channel(ch);
        ch->pid = current->pid;
 
        /* By default, channel is regular (non-TSG) channel */
@@ -868,6 +811,48 @@ int gk20a_channel_open(struct inode *inode, struct file *filp)
        return ret;
 }
 
+int gk20a_channel_open_ioctl(struct gk20a *g,
+               struct nvgpu_channel_open_args *args)
+{
+       int err;
+       int fd;
+       struct file *file;
+       char *name;
+
+       err = get_unused_fd_flags(O_RDWR);
+       if (err < 0)
+               return err;
+       fd = err;
+
+       name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
+                       dev_name(&g->dev->dev), fd);
+       if (!name) {
+               err = -ENOMEM;
+               goto clean_up;
+       }
+
+       file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
+       kfree(name);
+       if (IS_ERR(file)) {
+               err = PTR_ERR(file);
+               goto clean_up;
+       }
+       fd_install(fd, file);
+
+       err = __gk20a_channel_open(g, file);
+       if (err)
+               goto clean_up_file;
+
+       args->channel_fd = fd;
+       return 0;
+
+clean_up_file:
+       fput(file);
+clean_up:
+       put_unused_fd(fd);
+       return err;
+}
+
 /* allocate private cmd buffer.
    used for inserting commands before/after user submitted buffers. */
 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
@@ -1245,6 +1230,8 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        if (err)
                goto clean_up_unmap;
 
+       g->ops.fifo.bind_channel(c);
+
        gk20a_free_sgtable(&sgt);
 
        gk20a_dbg_fn("done");
@@ -1392,7 +1379,8 @@ static u32 get_gp_free_count(struct channel_gk20a *c)
        return gp_free_count(c);
 }
 
-static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
+static void trace_write_pushbuffer(struct channel_gk20a *c,
+                                  struct nvgpu_gpfifo *g)
 {
        void *mem = NULL;
        unsigned int words;
@@ -1428,6 +1416,18 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
        }
 }
 
+static void trace_write_pushbuffer_range(struct channel_gk20a *c,
+                                        struct nvgpu_gpfifo *g,
+                                        int count)
+{
+       if (gk20a_debug_trace_cmdbuf) {
+               int i;
+               struct nvgpu_gpfifo *gp = g;
+               for (i = 0; i < count; i++, gp++)
+                       trace_write_pushbuffer(c, gp);
+       }
+}
+
 static int gk20a_channel_add_job(struct channel_gk20a *c,
                                 struct gk20a_fence *pre_fence,
                                 struct gk20a_fence *post_fence)
@@ -1529,7 +1529,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        struct gk20a *g = c->g;
        struct device *d = dev_from_gk20a(g);
        int err = 0;
-       int i;
+       int start, end;
        int wait_fence_fd = -1;
        struct priv_cmd_entry *wait_cmd = NULL;
        struct priv_cmd_entry *incr_cmd = NULL;
@@ -1543,6 +1543,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        if (c->has_timedout)
                return -ETIMEDOUT;
 
+       /* fifo not large enough for request. Return error immediately */
+       if (c->gpfifo.entry_num < num_entries) {
+               gk20a_err(d, "not enough gpfifo space allocated");
+               return -ENOMEM;
+       }
+
        if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
                      NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
            !fence)
@@ -1593,9 +1599,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* Make sure we have enough space for gpfifo entries. If not,
         * wait for signals from completed submits */
        if (gp_free_count(c) < num_entries + extra_entries) {
+               trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
                err = wait_event_interruptible(c->submit_wq,
                        get_gp_free_count(c) >= num_entries + extra_entries ||
                        c->has_timedout);
+               trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
        }
 
        if (c->has_timedout) {
@@ -1604,7 +1612,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        }
 
        if (err) {
-               gk20a_err(d, "not enough gpfifo space");
+               gk20a_err(d, "timeout waiting for gpfifo space");
                err = -EAGAIN;
                goto clean_up;
        }
@@ -1672,15 +1680,34 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                wait_cmd->gp_put = c->gpfifo.put;
        }
 
-       for (i = 0; i < num_entries; i++) {
-               c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
-                       gpfifo[i].entry0; /* cmd buf va low 32 */
-               c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
-                       gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
-               trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
-               c->gpfifo.put = (c->gpfifo.put + 1) &
-                       (c->gpfifo.entry_num - 1);
+       /*
+        * Copy source gpfifo entries into the gpfifo ring buffer,
+        * potentially splitting into two memcpies to handle the
+        * ring buffer wrap-around case.
+        */
+       start = c->gpfifo.put;
+       end = start + num_entries;
+
+       if (end > c->gpfifo.entry_num) {
+               int length0 = c->gpfifo.entry_num - start;
+               int length1 = num_entries - length0;
+
+               memcpy(c->gpfifo.cpu_va + start, gpfifo,
+                      length0 * sizeof(*gpfifo));
+
+               memcpy(c->gpfifo.cpu_va, gpfifo + length0,
+                      length1 * sizeof(*gpfifo));
+
+               trace_write_pushbuffer_range(c, gpfifo, length0);
+               trace_write_pushbuffer_range(c, gpfifo + length0, length1);
+       } else {
+               memcpy(c->gpfifo.cpu_va + start, gpfifo,
+                      num_entries * sizeof(*gpfifo));
+
+               trace_write_pushbuffer_range(c, gpfifo, num_entries);
        }
+       c->gpfifo.put = (c->gpfifo.put + num_entries) &
+               (c->gpfifo.entry_num - 1);
 
        if (incr_cmd) {
                c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
@@ -2252,43 +2279,9 @@ long gk20a_channel_ioctl(struct file *filp,
 
        switch (cmd) {
        case NVGPU_IOCTL_CHANNEL_OPEN:
-       {
-               int fd;
-               struct file *file;
-               char *name;
-
-               err = get_unused_fd_flags(O_RDWR);
-               if (err < 0)
-                       break;
-               fd = err;
-
-               name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
-                               dev_name(&dev->dev), fd);
-               if (!name) {
-                       err = -ENOMEM;
-                       put_unused_fd(fd);
-                       break;
-               }
-
-               file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
-               kfree(name);
-               if (IS_ERR(file)) {
-                       err = PTR_ERR(file);
-                       put_unused_fd(fd);
-                       break;
-               }
-               fd_install(fd, file);
-
-               err = __gk20a_channel_open(ch->g, file);
-               if (err) {
-                       put_unused_fd(fd);
-                       fput(file);
-                       break;
-               }
-
-               ((struct nvgpu_channel_open_args *)buf)->channel_fd = fd;
+               err = gk20a_channel_open_ioctl(ch->g,
+                       (struct nvgpu_channel_open_args *)buf);
                break;
-       }
        case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
                break;
        case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX: