gpu: nvgpu: Implement ELPG flush for gm20b
Terje Bergstrom [Fri, 25 Apr 2014 12:00:54 +0000 (15:00 +0300)]
ELPG flush is initiated from a common broadcast register, but must be
waited on via per-L2 registers. Split gk20a and gm20b versions of
the flush.

Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/401545
Reviewed-by: Automatic_Commit_Validation_User

drivers/gpu/nvgpu/gk20a/ltc_common.c
drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
drivers/gpu/nvgpu/gm20b/ltc_gm20b.c

index ac46a9a..7247798 100644 (file)
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
                            0, max_comptag_lines - 1);
 
 }
-
-/* Flushes the compression bit cache as well as "data".
- * Note: the name here is a bit of a misnomer.  ELPG uses this
- * internally... but ELPG doesn't have to be on to do it manually.
- */
-static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
-{
-       u32 data;
-       s32 retry = 100;
-
-       gk20a_dbg_fn("");
-
-       /* Make sure all previous writes are committed to the L2. There's no
-          guarantee that writes are to DRAM. This will be a sysmembar internal
-          to the L2. */
-       gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
-                    ltc_ltcs_ltss_g_elpg_flush_pending_f());
-       do {
-               data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
-
-               if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
-                   ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
-                       gk20a_dbg_info("g_elpg_flush 0x%x", data);
-                       retry--;
-                       usleep_range(20, 40);
-               } else
-                       break;
-       } while (retry >= 0 || !tegra_platform_is_silicon());
-
-       if (retry < 0)
-               gk20a_warn(dev_from_gk20a(g),
-                           "g_elpg_flush too many retries");
-
-}
index c1ba2ae..9f5317f 100644 (file)
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
        gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
 
+/* Flushes the compression bit cache as well as "data".
+ * Note: the name here is a bit of a misnomer.  ELPG uses this
+ * internally... but ELPG doesn't have to be on to do it manually.
+ */
+static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
+{
+       u32 data;
+       s32 retry = 100;
+
+       gk20a_dbg_fn("");
+
+       /* Make sure all previous writes are committed to the L2. There's no
+          guarantee that writes are to DRAM. This will be a sysmembar internal
+          to the L2. */
+       gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+                    ltc_ltcs_ltss_g_elpg_flush_pending_f());
+       do {
+               data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
+
+               if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
+                   ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
+                       gk20a_dbg_info("g_elpg_flush 0x%x", data);
+                       retry--;
+                       usleep_range(20, 40);
+               } else
+                       break;
+       } while (retry >= 0 || !tegra_platform_is_silicon());
+
+       if (retry < 0)
+               gk20a_warn(dev_from_gk20a(g),
+                           "g_elpg_flush too many retries");
+
+}
+
 void gk20a_init_ltc(struct gpu_ops *gops)
 {
        gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
index 28c58f5..9840805 100644 (file)
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
 {
-        return 0x1;
+       return 0x1;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
 {
-        return 0x2;
+       return 0x2;
 }
 static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
 {
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
 {
        return 0x1;
 }
+static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
+{
+       return 0x00142214;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
+{
+       return (r >> 0) & 0x1;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
+{
+       return 0x00000001;
+}
+static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
+{
+       return 0x1;
+}
 static inline u32 ltc_ltc0_ltss_intr_r(void)
 {
        return 0x0014020c;
index 5da21c6..43c9097 100644 (file)
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
        gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
 }
 
+static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
+{
+       u32 data;
+       bool done[g->ltc_count];
+       s32 retry = 100;
+       int i;
+       int num_done = 0;
+       u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
+
+       gk20a_dbg_fn("");
+
+       for (i = 0; i < g->ltc_count; i++)
+               done[i] = 0;
+
+       gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
+                    ltc_ltcs_ltss_g_elpg_flush_pending_f());
+       do {
+               for (i = 0; i < g->ltc_count; i++) {
+                       if (done[i])
+                               continue;
+
+                       data = gk20a_readl(g,
+                                       ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
+
+                       if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
+                               gk20a_dbg_info("g_elpg_flush 0x%x", data);
+                       } else {
+                               done[i] = 1;
+                               num_done++;
+                       }
+               }
+
+               if (num_done < g->ltc_count) {
+                       retry--;
+                       usleep_range(20, 40);
+               } else
+                       break;
+       } while (retry >= 0 || !tegra_platform_is_silicon());
+
+       if (retry < 0)
+               gk20a_warn(dev_from_gk20a(g),
+                           "g_elpg_flush too many retries");
+}
+
 void gm20b_init_ltc(struct gpu_ops *gops)
 {
        /* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
        gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
        gops->ltc.init_comptags = gm20b_ltc_init_comptags;
        gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
-       gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
+       gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
        gops->ltc.isr = gm20b_ltc_isr;
 }