gpu: nvgpu: Allow suppressing WFI on submit
Terje Bergstrom [Tue, 1 Apr 2014 05:28:44 +0000 (08:28 +0300)]
Allow suppressing WFI when submitting work and requesting a fence
back.

Bug 1491545

Change-Id: Ic3d061bb4f116cf7ea68dbd6a1b2ace9f11d0ab5
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/390457
Reviewed-on: http://git-master/r/671029
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sibashis Mohapatra <sibashism@nvidia.com>
Tested-by: Sibashis Mohapatra <sibashism@nvidia.com>
Reviewed-by: Yogesh Kini <ykini@nvidia.com>
Reviewed-by: Winnie Hsu <whsu@nvidia.com>

drivers/gpu/nvgpu/gk20a/channel_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
include/linux/nvhost_ioctl.h

index f145b66..07137c2 100644 (file)
@@ -1413,6 +1413,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* we might need two extra gpfifo entries - one for pre fence
         * and one for post fence. */
        const int extra_entries = 2;
+       bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
 
        if (c->has_timedout)
                return -ETIMEDOUT;
@@ -1505,10 +1506,12 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                        flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
                err = c->sync->incr_user_fd(c->sync, &incr_cmd,
                                            &c->last_submit_fence,
+                                           need_wfi,
                                            &fence->syncpt_id);
        else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
                err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
                                                &c->last_submit_fence,
+                                               need_wfi,
                                                &fence->syncpt_id,
                                                &fence->value);
        else
@@ -1523,7 +1526,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
                        u64_hi32(wait_cmd->gva) |
                        pbdma_gp_entry1_length_f(wait_cmd->size);
-               trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+               trace_gk20a_push_cmdbuf(c->g->dev->name,
+                       0, wait_cmd->size, 0, wait_cmd->ptr);
 
                c->gpfifo.put = (c->gpfifo.put + 1) &
                        (c->gpfifo.entry_num - 1);
@@ -1548,7 +1552,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
                        u64_hi32(incr_cmd->gva) |
                        pbdma_gp_entry1_length_f(incr_cmd->size);
-               trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+               trace_gk20a_push_cmdbuf(c->g->dev->name,
+                       0, incr_cmd->size, 0, incr_cmd->ptr);
 
                c->gpfifo.put = (c->gpfifo.put + 1) &
                        (c->gpfifo.entry_num - 1);
index a8f5783..952087e 100644 (file)
@@ -261,6 +261,7 @@ int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
                                          struct priv_cmd_entry **entry,
                                          struct gk20a_channel_fence *fence,
+                                         bool wfi,
                                          u32 *id, u32 *thresh)
 {
        struct gk20a_channel_syncpt *sp =
@@ -268,8 +269,10 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
        /* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
         * to user space. */
        int err = __gk20a_channel_syncpt_incr(s,
-                       sp->c->obj_class == KEPLER_C /* use gfx class? */,
-                       sp->c->obj_class != KEPLER_C /* wfi if host class */,
+                       wfi &&
+                         sp->c->obj_class == KEPLER_C /* use gfx class? */,
+                       wfi &&
+                         sp->c->obj_class != KEPLER_C /* wfi if host class */,
                        true /* register irq */,
                        entry, fence);
        if (err)
@@ -282,6 +285,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
 int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
                                      struct priv_cmd_entry **entry,
                                      struct gk20a_channel_fence *fence,
+                                     bool wfi,
                                      int *fd)
 {
 #ifdef CONFIG_SYNC
@@ -289,7 +293,7 @@ int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
        struct nvhost_ctrl_sync_fence_info pt;
        struct gk20a_channel_syncpt *sp =
                container_of(s, struct gk20a_channel_syncpt, ops);
-       err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence,
+       err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, wfi,
                                                    &pt.id, &pt.thresh);
        if (err)
                return err;
index 80f38b2..90b61bf 100644 (file)
@@ -77,6 +77,7 @@ struct gk20a_channel_sync {
        int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
                                struct priv_cmd_entry **entry,
                                struct gk20a_channel_fence *fence,
+                               bool wfi,
                                u32 *id, u32 *thresh);
 
        /* Increment syncpoint/semaphore, so that the returned fence represents
@@ -89,6 +90,7 @@ struct gk20a_channel_sync {
        int (*incr_user_fd)(struct gk20a_channel_sync *s,
                            struct priv_cmd_entry **entry,
                            struct gk20a_channel_fence *fence,
+                           bool wfi,
                            int *fd);
 
        /* Reset the channel syncpoint/semaphore. */
index 4b6e1a2..b060864 100644 (file)
@@ -143,6 +143,8 @@ struct nvhost_fence {
 #define NVHOST_SUBMIT_GPFIFO_FLAGS_HW_FORMAT   BIT(2)
 /* create a sync fence fd instead of raw fence */
 #define NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE  BIT(3)
+/* suppress WFI before fence trigger */
+#define NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI        BIT(4)
 
 struct nvhost_submit_gpfifo_args {
        __u64 gpfifo;