video: tegra: host: Enhance FIFO/GATHER debug_dump

Enhance nvhost_debug_dump() output, as follows:

- Swap FIFO and GATHER dump so that even if GATHER dump blows out
  seq_printf 1k buffer, we still have FIFO information;
- Write FIFO signature pattern (0xd???d???) to indirect save input
  data to help pinpoint FIFO position within debug dumps;
- Prevent long data sequences from blowing out the seq_printf 1k
  buffer, by limiting such sequences to 64 words.

Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/62424
(cherry picked from commit cb37e4212b78546411b33b32044f30feb0579b86)
Change-Id: Ia2695c502fa0c7b755ef2ae51260650c7d67bf86
Reviewed-on: http://git-master/r/64061
Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
diff --git a/drivers/video/tegra/host/debug.c b/drivers/video/tegra/host/debug.c
index 7003bce..8453f68 100644
--- a/drivers/video/tegra/host/debug.c
+++ b/drivers/video/tegra/host/debug.c
@@ -52,8 +52,8 @@
 		mutex_lock(&ch->reflock);
 		if (ch->refcount) {
 			mutex_lock(&ch->cdma.lock);
-			m->op.debug.show_channel_cdma(m, o, i);
 			m->op.debug.show_channel_fifo(m, o, i);
+			m->op.debug.show_channel_cdma(m, o, i);
 			mutex_unlock(&ch->cdma.lock);
 		}
 		mutex_unlock(&ch->reflock);
@@ -73,7 +73,6 @@
 		nvhost_debug_output(o, "id %d (%s) min %d max %d\n",
 				    i, m->op.syncpt.name(&m->syncpt, i),
 			nvhost_syncpt_update_min(&m->syncpt, i), max);
-
 	}
 
 	for (i = 0; i < m->syncpt.nb_bases; i++) {
diff --git a/drivers/video/tegra/host/nvhost_cdma.h b/drivers/video/tegra/host/nvhost_cdma.h
index e0a034d..ae87d13 100644
--- a/drivers/video/tegra/host/nvhost_cdma.h
+++ b/drivers/video/tegra/host/nvhost_cdma.h
@@ -135,6 +135,7 @@
 int	nvhost_cdma_begin(struct nvhost_cdma *cdma,
 		struct nvhost_userctx_timeout *timeout);
 void	nvhost_cdma_push(struct nvhost_cdma *cdma, u32 op1, u32 op2);
+#define NVHOST_CDMA_PUSH_GATHER_CTXSAVE 0xffffffff
 void	nvhost_cdma_push_gather(struct nvhost_cdma *cdma,
 		struct nvmap_client *client,
 		struct nvmap_handle *handle, u32 op1, u32 op2);
diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c
index 5207c0a..51d79f0 100644
--- a/drivers/video/tegra/host/nvhost_syncpt.c
+++ b/drivers/video/tegra/host/nvhost_syncpt.c
@@ -214,7 +214,8 @@
 			if (check_count > MAX_STUCK_CHECK_COUNT) {
 				if (low_timeout) {
 					dev_warn(&syncpt_to_dev(sp)->pdev->dev,
-						"is timeout %d too low?\n", low_timeout);
+						"is timeout %d too low?\n",
+						low_timeout);
 				}
 				nvhost_debug_dump(syncpt_to_dev(sp));
 				BUG();
diff --git a/drivers/video/tegra/host/t20/3dctx_t20.c b/drivers/video/tegra/host/t20/3dctx_t20.c
index 7ac1b5e..d0609fb 100644
--- a/drivers/video/tegra/host/t20/3dctx_t20.c
+++ b/drivers/video/tegra/host/t20/3dctx_t20.c
@@ -138,8 +138,8 @@
 			struct nvhost_hwctx *ctx)
 {
 	nvhost_cdma_push_gather(cdma,
-			cdma_to_channel(cdma)->dev->nvmap,
-			nvmap_ref_to_handle(nvhost_3dctx_save_buf),
+			(void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE,
+			(void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE,
 			nvhost_opcode_gather(save_size),
 			save_phys);
 }
@@ -260,7 +260,17 @@
 			break;
 		}
 		if (ptr) {
-			memset(ptr, 0, count * 4);
+			/* SAVE cases only: reserve room for incoming data */
+			u32 k = 0;
+			/*
+			 * Create a signature pattern for indirect data (which
+			 * will be overwritten by true incoming data) for
+			 * better deducing where we are in a long command
+			 * sequence, when given only a FIFO snapshot for debug
+			 * purposes.
+			*/
+			for (k = 0; k < count; k++)
+				*(ptr + k) = 0xd000d000 | (offset << 16) | k;
 			ptr += count;
 		}
 		save_count += count;
@@ -332,9 +342,6 @@
 	nvhost_syncpt_cpu_incr(&ctx->channel->dev->syncpt, NVSYNCPT_3D);
 }
 
-
-/*** savers ***/
-
 int __init t20_nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h)
 {
 	struct nvhost_channel *ch;
diff --git a/drivers/video/tegra/host/t20/debug_t20.c b/drivers/video/tegra/host/t20/debug_t20.c
index 5acad0d..84267c9 100644
--- a/drivers/video/tegra/host/t20/debug_t20.c
+++ b/drivers/video/tegra/host/t20/debug_t20.c
@@ -104,10 +104,6 @@
 	}
 }
 
-/*
- * TODO: This uses ioremap_xxx on memory which is deprecated.
- * Also, it won't work properly with SMMU.
- */
 static void show_channel_gather(struct output *o, u32 addr,
 		phys_addr_t phys_addr,
 		u32 words, struct nvhost_cdma *cdma);
@@ -115,6 +111,8 @@
 static void show_channel_word(struct output *o, int *state, int *count,
 		u32 addr, u32 val, struct nvhost_cdma *cdma)
 {
+	static int start_count, dont_print;
+
 	switch (*state) {
 	case NVHOST_DBG_STATE_CMD:
 		if (addr)
@@ -123,6 +121,8 @@
 			nvhost_debug_output(o, "%08x:", val);
 
 		*state = show_channel_command(o, addr, val, count);
+		dont_print = 0;
+		start_count = *count;
 		if (*state == NVHOST_DBG_STATE_DATA && *count == 0) {
 			*state = NVHOST_DBG_STATE_CMD;
 			nvhost_debug_output(o, "])\n");
@@ -131,7 +131,14 @@
 
 	case NVHOST_DBG_STATE_DATA:
 		(*count)--;
-		nvhost_debug_output(o, "%08x%s", val, *count > 0 ? ", " : "])\n");
+		if (start_count - *count < 64)
+			nvhost_debug_output(o, "%08x%s",
+				val, *count > 0 ? ", " : "])\n");
+		else if (!dont_print && (*count > 0)) {
+			nvhost_debug_output(o, "[truncated; %d more words]\n",
+				*count);
+			dont_print = 1;
+		}
 		if (*count == 0)
 			*state = NVHOST_DBG_STATE_CMD;
 		break;
@@ -139,8 +146,10 @@
 	case NVHOST_DBG_STATE_GATHER:
 		*state = NVHOST_DBG_STATE_CMD;
 		nvhost_debug_output(o, "%08x]):\n", val);
-		if (cdma)
-			show_channel_gather(o, addr, val, *count, cdma);
+		if (cdma) {
+			show_channel_gather(o, addr, val,
+					*count, cdma);
+		}
 		break;
 	}
 }
@@ -158,6 +167,11 @@
 	phys_addr_t pin_addr;
 	int state, count, i;
 
+	if ((u32)nvmap->handle == NVHOST_CDMA_PUSH_GATHER_CTXSAVE) {
+		nvhost_debug_output(o, "[context save]\n");
+		return;
+	}
+
 	if (!nvmap->handle || !nvmap->client
 			|| atomic_read(&nvmap->handle->ref) < 1) {
 		nvhost_debug_output(o, "[already deallocated]\n");
@@ -285,6 +299,10 @@
 		break;
 	}
 
+	nvhost_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+		dmaput, dmaget, dmactrl);
+	nvhost_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+
 	cdma_peek(cdma, dmaget, -1, pbw);
 	show_channel_pair(o, previous_oppair(cdma, dmaget), pbw[0], pbw[1], &channel->cdma);
 	nvhost_debug_output(o, "\n");
@@ -294,11 +312,17 @@
 				 struct output *o, int chid)
 {
 	u32 val, rd_ptr, wr_ptr, start, end;
+	struct nvhost_channel *channel = m->channels + chid;
 	int state, count;
 
-	val = readl(m->aperture + HOST1X_CHANNEL_FIFOSTAT);
-	if (val & (1 << 10))
+	nvhost_debug_output(o, "%d: fifo:\n", chid);
+
+	val = readl(channel->aperture + HOST1X_CHANNEL_FIFOSTAT);
+	nvhost_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (val & (1 << 10)) {
+		nvhost_debug_output(o, "[empty]\n");
 		return;
+	}
 
 	writel(0x0, m->aperture + HOST1X_SYNC_CFPEEK_CTRL);
 	writel((1 << 31) | (chid << 16),
@@ -313,7 +337,6 @@
 	end = (val >> 16) & 0x1ff;
 
 	state = NVHOST_DBG_STATE_CMD;
-	nvhost_debug_output(o, "%d: fifo:\n", chid);
 
 	do {
 		writel(0x0, m->aperture + HOST1X_SYNC_CFPEEK_CTRL);
diff --git a/drivers/video/tegra/host/t30/3dctx_t30.c b/drivers/video/tegra/host/t30/3dctx_t30.c
index 614d079..1678cf9 100644
--- a/drivers/video/tegra/host/t30/3dctx_t30.c
+++ b/drivers/video/tegra/host/t30/3dctx_t30.c
@@ -85,6 +85,7 @@
 };
 
 static unsigned int restore_set1_offset;
+
 /* the same context save command sequence is used for all contexts. */
 static phys_addr_t save_phys;
 static unsigned int save_size;
@@ -151,8 +152,8 @@
 			ctx->restore_phys);
 	/* gather the save buffer */
 	nvhost_cdma_push_gather(cdma,
-			cdma_to_channel(cdma)->dev->nvmap,
-			nvmap_ref_to_handle(nvhost_3dctx_save_buf),
+			(void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE,
+			(void *)NVHOST_CDMA_PUSH_GATHER_CTXSAVE,
 			nvhost_opcode_gather(save_size),
 			save_phys);
 }
@@ -272,7 +273,17 @@
 			break;
 		}
 		if (ptr) {
-			memset(ptr, 0, count * 4);
+			/* SAVE cases only: reserve room for incoming data */
+			u32 k = 0;
+			/*
+			 * Create a signature pattern for indirect data (which
+			 * will be overwritten by true incoming data) for
+			 * better deducing where we are in a long command
+			 * sequence, when given only a FIFO snapshot for debug
+			 * purposes.
+			*/
+			for (k = 0; k < count; k++)
+				*(ptr + k) = 0xd000d000 | (offset << 16) | k;
 			ptr += count;
 		}
 		save_count += count;
@@ -402,8 +413,6 @@
 	return nvhost_3dctx_alloc_common(ch, false);
 }
 
-/*** savers ***/
-
 int __init t30_nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h)
 {
 	struct nvhost_channel *ch;