gpu: nvgpu: Implement NVGPU_GPU_IOCTL_GET_GPU_TIME
Sami Kiminki [Tue, 12 Apr 2016 19:33:36 +0000 (22:33 +0300)]
Implement NVGPU_GPU_IOCTL_GET_GPU_TIME for reading the GPU time.

Bug 1395833

Change-Id: I7ddc7c28ff0c9a336cc0dcd820b15fb0fea714d0
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/1125630
(cherry picked from commit 6b35cb05b7822174bf037da7229154004df4f229)
Reviewed-on: http://git-master/r/1317214
(cherry picked from commit cf731c89ab525c59dad38a346649999517e8ecea)
Reviewed-on: http://git-master/r/1325192
GVS: Gerrit_Virtual_Submit
Reviewed-by: Donghan Ryu <dryu@nvidia.com>
(cherry picked from commit f118e3efb7aa3ff107b00540bbd55a032cd1ddf3)
Reviewed-on: http://git-master/r/1461691
Tested-by: Daniel Koch <dkoch@nvidia.com>
Reviewed-by: Daniel Koch <dkoch@nvidia.com>

drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
drivers/gpu/nvgpu/gk20a/hw_timer_gk20a.h
drivers/gpu/nvgpu/gm20b/hw_timer_gm20b.h
include/uapi/linux/nvgpu.h

index 7b05a27..6bd9a61 100644 (file)
@@ -29,6 +29,7 @@
 #include "hw_gr_gk20a.h"
 #include "hw_fb_gk20a.h"
 #include "hw_proj_gk20a.h"
+#include "hw_timer_gk20a.h"
 
 
 int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
@@ -514,6 +515,58 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
        return err;
 }
 
+static int nvgpu_gpu_get_gpu_time(
+       struct gk20a *g,
+       struct nvgpu_gpu_get_gpu_time_args *args)
+{
+       int err = 0;
+       const unsigned int max_iterations = 3;
+       unsigned int i = 0;
+       u32 gpu_timestamp_hi_prev = 0;
+
+       err = gk20a_busy(g->dev);
+       if (err)
+               return err;
+
+       /* Note. The GPU nanosecond timer consists of two 32-bit
+        * registers (high & low). To detect a possible low register
+        * wrap-around between the reads, we need to read the high
+        * register before and after low. The wraparound happens
+        * approximately once per 4 secs. */
+
+       /* get initial gpu_timestamp_hi value */
+       gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
+
+       for (i = 0; i < max_iterations; ++i) {
+               u32 gpu_timestamp_hi = 0;
+               u32 gpu_timestamp_lo = 0;
+
+               rmb(); /* maintain read order */
+               gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
+               rmb(); /* maintain read order */
+               gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
+
+               if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
+                       args->gpu_timestamp =
+                               (((u64)gpu_timestamp_hi) << 32) |
+                               gpu_timestamp_lo;
+                       goto clean_up;
+               }
+
+               /* wrap-around detected, retry */
+               gpu_timestamp_hi_prev = gpu_timestamp_hi;
+       }
+
+       /* too many iterations, bail out */
+       gk20a_err(dev_from_gk20a(g),
+                 "Failed to read GPU time. Clock or bus unstable?\n");
+       err = -EBUSY;
+
+clean_up:
+       gk20a_idle(g->dev);
+       return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
        struct platform_device *dev = filp->private_data;
@@ -737,6 +790,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
                        (struct nvgpu_gpu_vsms_mapping *)buf);
                break;
 
+       case NVGPU_GPU_IOCTL_GET_GPU_TIME:
+               err = nvgpu_gpu_get_gpu_time(g,
+                       (struct nvgpu_gpu_get_gpu_time_args *)buf);
+               break;
 
        default:
                dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
index 22bc50a..90bf4f0 100644 (file)
@@ -98,4 +98,12 @@ static inline u32 timer_pri_timeout_fecs_errcode_r(void)
 {
        return 0x0000908c;
 }
+static inline u32 timer_time_0_r(void)
+{
+        return 0x00009400;
+}
+static inline u32 timer_time_1_r(void)
+{
+        return 0x00009410;
+}
 #endif
index 126f7c8..3f5d679 100644 (file)
@@ -98,4 +98,12 @@ static inline u32 timer_pri_timeout_fecs_errcode_r(void)
 {
        return 0x0000908c;
 }
+static inline u32 timer_time_0_r(void)
+{
+        return 0x00009400;
+}
+static inline u32 timer_time_1_r(void)
+{
+        return 0x00009410;
+}
 #endif
index e4dcbfc..2f65b76 100644 (file)
@@ -329,6 +329,14 @@ struct nvgpu_gpu_vsms_mapping {
        __u64 vsms_map_buf_addr;
 };
 
+struct nvgpu_gpu_get_gpu_time_args {
+       /* raw GPU counter (PTIMER) value */
+       __u64 gpu_timestamp;
+
+       /* reserved for future extensions */
+       __u64 reserved;
+};
+
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
        _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -367,9 +375,11 @@ struct nvgpu_gpu_vsms_mapping {
        _IOWR(NVGPU_GPU_IOCTL_MAGIC, 18, struct nvgpu_gpu_num_vsms)
 #define NVGPU_GPU_IOCTL_VSMS_MAPPING \
        _IOWR(NVGPU_GPU_IOCTL_MAGIC, 19, struct nvgpu_gpu_vsms_mapping)
-
+#define NVGPU_GPU_IOCTL_GET_GPU_TIME \
+       _IOWR(NVGPU_GPU_IOCTL_MAGIC, 25, \
+                       struct nvgpu_gpu_get_gpu_time_args)
 #define NVGPU_GPU_IOCTL_LAST           \
-       _IOC_NR(NVGPU_GPU_IOCTL_VSMS_MAPPING)
+       _IOC_NR(NVGPU_GPU_IOCTL_GET_GPU_TIME)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE   \
        sizeof(struct nvgpu_gpu_prepare_compressible_read_args)