video: tegra: gpu debugger
Ken Adams [Thu, 27 Feb 2014 21:57:26 +0000 (13:57 -0800)]
Add a control to manage SMPC context switch mode.
This is needed in reaction to the recent security
change which disabled set_falcon[5] etc.

Bug 1375360

Change-Id: Ia9d1a97a1b89b48538010d74207ff4d1b8852083
Signed-off-by: Ken Adams <kadams@nvidia.com>
Reviewed-on: http://git-master/r/375772
(cherry picked from commit c3c7d8b60f62bc276d0e773994ea6e0a4d9422cb)
Reviewed-on: http://git-master/r/382743
GVS: Gerrit_Virtual_Submit
Reviewed-by: Matthew Pedro <mapedro@nvidia.com>
Tested-by: Matthew Pedro <mapedro@nvidia.com>

drivers/video/tegra/host/gk20a/dbg_gpu_gk20a.c
drivers/video/tegra/host/gk20a/gr_gk20a.c
drivers/video/tegra/host/gk20a/gr_gk20a.h
drivers/video/tegra/host/gk20a/hw_ctxsw_prog_gk20a.h
include/linux/nvhost_dbg_gpu_ioctl.h

index fa52607..24032ed 100644 (file)
@@ -371,6 +371,9 @@ static int nvhost_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
                                struct nvhost_dbg_gpu_powergate_args *args);
 
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+                             struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args);
+
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
                             unsigned long arg)
 {
@@ -417,6 +420,11 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
                           (struct nvhost_dbg_gpu_events_ctrl_args *)buf);
                break;
 
+       case NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE:
+               err = nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(dbg_s,
+                          (struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *)buf);
+               break;
+
        default:
                nvhost_err(dev_from_gk20a(g),
                           "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -616,3 +624,71 @@ static int nvhost_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
        mutex_unlock(&g->dbg_sessions_lock);
        return  err;
 }
+
+static int nvhost_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
+                              struct nvhost_dbg_gpu_smpc_ctxsw_mode_args *args)
+{
+       int err;
+       struct gk20a *g = get_gk20a(dbg_s->pdev);
+       struct channel_gk20a *ch_gk20a;
+
+       nvhost_dbg_fn("%s smpc ctxsw mode = %d",
+                     dev_name(dbg_s->dev), args->mode);
+
+       /* Take the global lock, since we'll be doing global regops */
+       mutex_lock(&g->dbg_sessions_lock);
+
+       ch_gk20a = dbg_s->ch;
+
+       if (!ch_gk20a) {
+               nvhost_err(dev_from_gk20a(dbg_s->g),
+                  "no bound channel for smpc ctxsw mode update\n");
+               err = -EINVAL;
+               goto clean_up;
+       }
+
+       err = gr_gk20a_update_smpc_ctxsw_mode(g, ch_gk20a,
+                     args->mode == NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+       if (err) {
+               nvhost_err(dev_from_gk20a(dbg_s->g),
+                          "error (%d) during smpc ctxsw mode update\n", err);
+               goto clean_up;
+       }
+       /* The following regops are a hack/war to make up for the fact that we
+        * just scribbled into the ctxsw image w/o really knowing whether
+        * it was already swapped out in/out once or not, etc.
+        */
+       {
+               struct nvhost_dbg_gpu_reg_op ops[4];
+               int i;
+               for (i = 0; i < ARRAY_SIZE(ops); i++) {
+                       ops[i].op     = NVHOST_DBG_GPU_REG_OP_WRITE_32;
+                       ops[i].type   = NVHOST_DBG_GPU_REG_OP_TYPE_GR_CTX;
+                       ops[i].status = NVHOST_DBG_GPU_REG_OP_STATUS_SUCCESS;
+                       ops[i].value_hi      = 0;
+                       ops[i].and_n_mask_lo = 0;
+                       ops[i].and_n_mask_hi = 0;
+               }
+               /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control_sel1_r();*/
+               ops[0].offset   = 0x00419e08;
+               ops[0].value_lo = 0x1d;
+
+               /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control5_r(); */
+               ops[1].offset   = 0x00419e58;
+               ops[1].value_lo = 0x1;
+
+               /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter_control3_r(); */
+               ops[2].offset   = 0x00419e68;
+               ops[2].value_lo = 0xaaaa;
+
+               /* gr_pri_gpcs_tpcs_sm_dsm_perf_counter4_control_r(); */
+               ops[3].offset   = 0x00419f40;
+               ops[3].value_lo = 0x18;
+
+               err = dbg_s->ops->exec_reg_ops(dbg_s, ops, ARRAY_SIZE(ops));
+       }
+
+ clean_up:
+       mutex_unlock(&g->dbg_sessions_lock);
+       return  err;
+}
index 15ec816..6b2e928 100644 (file)
@@ -1564,6 +1564,42 @@ clean_up:
        return err;
 }
 
+int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
+                                   struct channel_gk20a *c,
+                                   bool enable_smpc_ctxsw)
+{
+       struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+       void *ctx_ptr = NULL;
+       u32 data;
+
+       /*XXX caller responsible for making sure the channel is quiesced? */
+
+       /* Channel gr_ctx buffer is gpu cacheable.
+          Flush and invalidate before cpu update. */
+       gk20a_mm_fb_flush(g);
+       gk20a_mm_l2_flush(g, true);
+
+       ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
+                       PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
+                       0, pgprot_dmacoherent(PAGE_KERNEL));
+       if (!ctx_ptr)
+               return -ENOMEM;
+
+       data = mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
+       data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
+       data |= enable_smpc_ctxsw ?
+               ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
+               ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
+       mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
+                data);
+
+       vunmap(ctx_ptr);
+
+       gk20a_mm_l2_invalidate(g);
+
+       return 0;
+}
+
 /* load saved fresh copy of gloden image into channel gr_ctx */
 static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
                                        struct channel_gk20a *c)
@@ -1572,7 +1608,7 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
        struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
        u32 virt_addr_lo;
        u32 virt_addr_hi;
-       u32 i, v;
+       u32 i, v, data;
        int ret = 0;
        void *ctx_ptr = NULL;
 
@@ -1610,10 +1646,13 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 
        /* no user for client managed performance counter ctx */
        ch_ctx->pm_ctx.ctx_sw_mode =
-               ctxsw_prog_main_image_pm_mode_no_ctxsw_v();
-
+               ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
+       data = mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0);
+       data = data & ~ctxsw_prog_main_image_pm_mode_m();
+       data |= ch_ctx->pm_ctx.ctx_sw_mode;
        mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0,
-               ch_ctx->pm_ctx.ctx_sw_mode);
+                data);
+
        mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, 0);
 
        /* set priv access map */
index 2d138cf..ea8b87f 100644 (file)
@@ -372,5 +372,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
                                    u32 *offsets, u32 *offset_addrs,
                                    u32 *num_offsets,
                                    bool is_quad, u32 quad);
-
+int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
+                                struct channel_gk20a *c,
+                                   bool enable_smpc_ctxsw);
 #endif /*__GR_GK20A_H__*/
index 745cfd4..e2a4f2f 100644 (file)
@@ -90,13 +90,33 @@ static inline u32 ctxsw_prog_main_image_pm_o(void)
 {
        return 0x00000028;
 }
+static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
+{
+       return 0x7 << 0;
+}
 static inline u32 ctxsw_prog_main_image_pm_mode_v(u32 r)
 {
        return (r >> 0) & 0x7;
 }
-static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_v(void)
+static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
+{
+       return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
+{
+       return 0x7 << 3;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_v(u32 r)
 {
-       return 0x00000000;
+       return (r >> 3) & 0x7;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(void)
+{
+       return 0x0;
+}
+static inline u32 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f(void)
+{
+       return 0x8;
 }
 static inline u32 ctxsw_prog_main_image_pm_ptr_o(void)
 {
index 1791b66..7ba43a0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -137,8 +137,21 @@ struct nvhost_dbg_gpu_powergate_args {
 #define NVHOST_DBG_GPU_IOCTL_POWERGATE                                 \
        _IOWR(NVHOST_DBG_GPU_IOCTL_MAGIC, 4, struct nvhost_dbg_gpu_powergate_args)
 
+
+/* SMPC Context Switch Mode */
+#define NVHOST_DBG_GPU_SMPC_CTXSW_MODE_NO_CTXSW               (0x00000000)
+#define NVHOST_DBG_GPU_SMPC_CTXSW_MODE_CTXSW                  (0x00000001)
+
+struct nvhost_dbg_gpu_smpc_ctxsw_mode_args {
+       __u32 mode;
+} __packed;
+
+#define NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE \
+       _IOWR(NVHOST_DBG_GPU_IOCTL_MAGIC, 5, struct nvhost_dbg_gpu_smpc_ctxsw_mode_args)
+
+
 #define NVHOST_DBG_GPU_IOCTL_LAST              \
-       _IOC_NR(NVHOST_DBG_GPU_IOCTL_POWERGATE)
+       _IOC_NR(NVHOST_DBG_GPU_IOCTL_SMPC_CTXSW_MODE)
 #define NVHOST_DBG_GPU_IOCTL_MAX_ARG_SIZE              \
        sizeof(struct nvhost_dbg_gpu_exec_reg_ops_args)