gpu: nvgpu: create sync_fence only if needed
Deepak Nibade [Wed, 7 Oct 2015 10:50:07 +0000 (15:50 +0530)]
Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.

Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.

Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)

Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path

Bug 200141116

Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/818190
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>

drivers/gpu/nvgpu/gk20a/cde_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.h
drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
drivers/gpu/nvgpu/gk20a/fence_gk20a.c
drivers/gpu/nvgpu/gk20a/fence_gk20a.h

index b3f3d66..ffe6a1e 100644 (file)
@@ -718,7 +718,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
        }
 
        return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
-                                          num_entries, flags, fence, fence_out);
+                                  num_entries, flags, fence, fence_out, true);
 }
 
 static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
index 99fc17c..fd94667 100644 (file)
@@ -1632,7 +1632,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                u32 num_entries,
                                u32 flags,
                                struct nvgpu_fence *fence,
-                               struct gk20a_fence **fence_out)
+                               struct gk20a_fence **fence_out,
+                               bool force_need_sync_fence)
 {
        struct gk20a *g = c->g;
        struct device *d = dev_from_gk20a(g);
@@ -1649,6 +1650,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
        bool skip_buffer_refcounting = (flags &
                        NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
+       bool need_sync_fence = false;
+
+       /*
+        * If user wants to allocate sync_fence_fd always, then respect that;
+        * otherwise, allocate sync_fence_fd based on user flags only
+        */
+       if (force_need_sync_fence)
+               need_sync_fence = true;
 
        if (c->has_timedout)
                return -ETIMEDOUT;
@@ -1769,15 +1778,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                goto clean_up;
        }
 
+       if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
+                       (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
+               need_sync_fence = true;
 
        /* always insert syncpt increment at end of gpfifo submission
           to keep track of method completion for idle railgating */
        if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
                err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
-                                        &post_fence, need_wfi);
+                                &post_fence, need_wfi, need_sync_fence);
        else
                err = c->sync->incr(c->sync, &incr_cmd,
-                                   &post_fence);
+                                   &post_fence, need_sync_fence);
        if (err) {
                mutex_unlock(&c->submit_lock);
                goto clean_up;
@@ -2376,7 +2388,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 
        ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
                                          args->flags, &args->fence,
-                                         &fence_out);
+                                         &fence_out, false);
 
        if (ret)
                goto clean_up;
index 576040f..ab560f3 100644 (file)
@@ -243,7 +243,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                                u32 num_entries,
                                u32 flags,
                                struct nvgpu_fence *fence,
-                               struct gk20a_fence **fence_out);
+                               struct gk20a_fence **fence_out,
+                               bool force_need_sync_fence);
 
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                               struct nvgpu_alloc_gpfifo_args *args);
index 4eea721..6c82653 100644 (file)
@@ -165,7 +165,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                                       bool wfi_cmd,
                                       bool register_irq,
                                       struct priv_cmd_entry **entry,
-                                      struct gk20a_fence **fence)
+                                      struct gk20a_fence **fence,
+                                      bool need_sync_fence)
 {
        u32 thresh;
        int incr_cmd_size;
@@ -238,7 +239,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
        }
 
        *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
-                                        wfi_cmd);
+                                        wfi_cmd, need_sync_fence);
        *entry = incr_cmd;
        return 0;
 }
@@ -250,33 +251,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
        return __gk20a_channel_syncpt_incr(s,
                        true /* wfi */,
                        false /* no irq handler */,
-                       entry, fence);
+                       entry, fence, true);
 }
 
 static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                              struct priv_cmd_entry **entry,
-                             struct gk20a_fence **fence)
+                             struct gk20a_fence **fence,
+                             bool need_sync_fence)
 {
        /* Don't put wfi cmd to this one since we're not returning
         * a fence to user space. */
        return __gk20a_channel_syncpt_incr(s,
                        false /* no wfi */,
                        true /* register irq */,
-                       entry, fence);
+                       entry, fence, need_sync_fence);
 }
 
 static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
                                   int wait_fence_fd,
                                   struct priv_cmd_entry **entry,
                                   struct gk20a_fence **fence,
-                                  bool wfi)
+                                  bool wfi,
+                                  bool need_sync_fence)
 {
        /* Need to do 'wfi + host incr' since we return the fence
         * to user space. */
        return __gk20a_channel_syncpt_incr(s,
                        wfi,
                        true /* register irq */,
-                       entry, fence);
+                       entry, fence, need_sync_fence);
 }
 
 static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
@@ -512,7 +515,8 @@ static int __gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s, bool wfi_cmd,
                struct sync_fence *dependency,
                struct priv_cmd_entry **entry,
-               struct gk20a_fence **fence)
+               struct gk20a_fence **fence,
+               bool need_sync_fence)
 {
        u64 va;
        int incr_cmd_size;
@@ -559,18 +563,19 @@ static int gk20a_channel_semaphore_incr_wfi(
        return __gk20a_channel_semaphore_incr(s,
                        true /* wfi */,
                        NULL,
-                       entry, fence);
+                       entry, fence, true);
 }
 
 static int gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s,
                struct priv_cmd_entry **entry,
-               struct gk20a_fence **fence)
+               struct gk20a_fence **fence,
+               bool need_sync_fence)
 {
        /* Don't put wfi cmd to this one since we're not returning
         * a fence to user space. */
        return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
-                                             NULL, entry, fence);
+                                     NULL, entry, fence, need_sync_fence);
 }
 
 static int gk20a_channel_semaphore_incr_user(
@@ -578,7 +583,8 @@ static int gk20a_channel_semaphore_incr_user(
                int wait_fence_fd,
                struct priv_cmd_entry **entry,
                struct gk20a_fence **fence,
-               bool wfi)
+               bool wfi,
+               bool need_sync_fence)
 {
 #ifdef CONFIG_SYNC
        struct sync_fence *dependency = NULL;
@@ -591,7 +597,7 @@ static int gk20a_channel_semaphore_incr_user(
        }
 
        err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
-                                            entry, fence);
+                                            entry, fence, need_sync_fence);
        if (err) {
                if (dependency)
                        sync_fence_put(dependency);
index a347cba..618e1b2 100644 (file)
@@ -3,7 +3,7 @@
  *
  * GK20A Channel Synchronization Abstraction
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -54,7 +54,8 @@ struct gk20a_channel_sync {
         */
        int (*incr)(struct gk20a_channel_sync *s,
                    struct priv_cmd_entry **entry,
-                   struct gk20a_fence **fence);
+                   struct gk20a_fence **fence,
+                   bool need_sync_fence);
 
        /* Increment syncpoint/semaphore, preceded by a wfi.
         * Returns
@@ -76,7 +77,8 @@ struct gk20a_channel_sync {
                         int wait_fence_fd,
                         struct priv_cmd_entry **entry,
                         struct gk20a_fence **fence,
-                        bool wfi);
+                        bool wfi,
+                        bool need_sync_fence);
 
        /* Reset the channel syncpoint/semaphore. */
        void (*set_min_eq_max)(struct gk20a_channel_sync *s);
index 54a288c..ae19d36 100644 (file)
@@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
 };
 
 struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
-                                           u32 id, u32 value, bool wfi)
+                                           u32 id, u32 value, bool wfi,
+                                           bool need_sync_fence)
 {
        struct gk20a_fence *f;
        struct sync_fence *sync_fence = NULL;
@@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
                .thresh = value
        };
 
-       sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
-                                             "fence");
-       if (IS_ERR(sync_fence))
-               return NULL;
+       if (need_sync_fence) {
+               sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
+                                                     "fence");
+               if (IS_ERR(sync_fence))
+                       return NULL;
+       }
 #endif
 
        f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
index 629dc69..75e135e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * GK20A Fences
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -56,7 +56,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
 
 struct gk20a_fence *gk20a_fence_from_syncpt(
                struct platform_device *host1x_pdev,
-               u32 id, u32 value, bool wfi);
+               u32 id, u32 value, bool wfi,
+               bool need_sync_fence);
 
 /* Fence operations */
 void gk20a_fence_put(struct gk20a_fence *f);