[ARM/tegra] nvhost: Tegra3 support
Andrew Howe [Fri, 10 Dec 2010 14:18:33 +0000 (16:18 +0200)]
Adding support for Tegra3:
* auto context save (without FIFO reading in interrupt)
* new registers
* SLI

Note: currently hardcoded to Tegra3, SLIx2. Need query function.

Original-Change-Id: I7daff768540ac0f0af12a655a664428a3ae55665
Reviewed-on: http://git-master/r/12564
Tested-by: Andrew Howe <ahowe@nvidia.com>
Tested-by: Jussi Rasanen <jrasanen@nvidia.com>
Reviewed-by: Andrew Howe <ahowe@nvidia.com>
Reviewed-by: Scott Williams <scwilliams@nvidia.com>
Original-Change-Id: I42bfa94856676bfd82b4c11cc8cf523ca2c0dbe6

Rebase-Id: R9b1f81b7a0323f87a4c13804e483383595971550

drivers/video/tegra/host/dev.c
drivers/video/tegra/host/nvhost_3dctx.c
drivers/video/tegra/host/nvhost_channel.c
drivers/video/tegra/host/nvhost_channel.h
drivers/video/tegra/host/nvhost_hardware.h
drivers/video/tegra/host/nvhost_hwctx.h
drivers/video/tegra/host/nvhost_intr.c

index eeffcef..2385364 100644 (file)
@@ -52,8 +52,8 @@ struct nvhost_channel_userctx {
        u32 relocs_pending;
        u32 null_kickoff;
        struct nvmap_handle_ref *gather_mem;
-       struct nvhost_op_pair *gathers;
-       int num_gathers;
+       u32 *gathers;
+       u32 *cur_gather;
        int pinarray_size;
        struct nvmap_pinarray_elem pinarray[NVHOST_MAX_HANDLES];
        struct nvmap_handle *unpinarray[NVHOST_MAX_HANDLES];
@@ -91,7 +91,6 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
 {
        struct nvhost_channel_userctx *priv;
        struct nvhost_channel *ch;
-       size_t gather_size;
 
        ch = container_of(inode->i_cdev, struct nvhost_channel, cdev);
        ch = nvhost_getchannel(ch);
@@ -105,9 +104,9 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
        }
        filp->private_data = priv;
        priv->ch = ch;
-       gather_size = sizeof(struct nvhost_op_pair) * NVHOST_MAX_GATHERS;
-       priv->gather_mem = nvmap_alloc(ch->dev->nvmap, gather_size, 32,
-                                      NVMAP_HANDLE_CACHEABLE);
+       priv->gather_mem = nvmap_alloc(ch->dev->nvmap,
+                               sizeof(u32) * 2 * NVHOST_MAX_GATHERS, 32,
+                               NVMAP_HANDLE_CACHEABLE);
        if (IS_ERR(priv->gather_mem))
                goto fail;
 
@@ -117,7 +116,7 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
                        goto fail;
        }
 
-       priv->gathers = (struct nvhost_op_pair *)nvmap_mmap(priv->gather_mem);
+       priv->gathers = nvmap_mmap(priv->gather_mem);
 
        return 0;
 fail:
@@ -125,17 +124,18 @@ fail:
        return -ENOMEM;
 }
 
-static void add_gather(struct nvhost_channel_userctx *ctx, int idx,
-                      u32 mem_id, u32 words, u32 offset)
+static void add_gather(struct nvhost_channel_userctx *ctx,
+               u32 mem_id, u32 words, u32 offset)
 {
        struct nvmap_pinarray_elem *pin;
+       u32* cur_gather = ctx->cur_gather;
        pin = &ctx->pinarray[ctx->pinarray_size++];
        pin->patch_mem = (u32)nvmap_ref_to_handle(ctx->gather_mem);
-       pin->patch_offset = (idx * sizeof(struct nvhost_op_pair)) +
-               offsetof(struct nvhost_op_pair, op2);
+       pin->patch_offset = ((cur_gather + 1) - ctx->gathers) * sizeof(u32);
        pin->pin_mem = mem_id;
        pin->pin_offset = offset;
-       ctx->gathers[idx].op1 = nvhost_opcode_gather(0, words);
+       cur_gather[0] = words;
+       ctx->cur_gather = cur_gather + 2;
 }
 
 static void reset_submit(struct nvhost_channel_userctx *ctx)
@@ -165,8 +165,7 @@ static ssize_t nvhost_channelwrite(struct file *filp, const char __user *buf,
                                err = -EFAULT;
                                break;
                        }
-                       /* leave room for ctx switch */
-                       priv->num_gathers = 2;
+                       priv->cur_gather = priv->gathers;
                        priv->pinarray_size = 0;
                } else if (priv->cmdbufs_pending) {
                        struct nvhost_cmdbuf cmdbuf;
@@ -177,8 +176,8 @@ static ssize_t nvhost_channelwrite(struct file *filp, const char __user *buf,
                                err = -EFAULT;
                                break;
                        }
-                       add_gather(priv, priv->num_gathers++,
-                                  cmdbuf.mem, cmdbuf.words, cmdbuf.offset);
+                       add_gather(priv,
+                               cmdbuf.mem, cmdbuf.words, cmdbuf.offset);
                        priv->cmdbufs_pending--;
                } else if (priv->relocs_pending) {
                        int numrelocs = remaining / sizeof(struct nvhost_reloc);
@@ -214,106 +213,42 @@ static int nvhost_ioctl_channel_flush(struct nvhost_channel_userctx *ctx,
                                       struct nvhost_get_param_args *args,
                                       int null_kickoff)
 {
-       struct nvhost_cpuinterrupt ctxsw;
-       int gather_idx = 2;
-       int num_intrs = 0;
-       u32 syncval;
+       struct device *device = &ctx->ch->dev->pdev->dev;
        int num_unpin;
        int err;
-       int nulled_incrs = null_kickoff ? ctx->syncpt_incrs : 0;
 
        if (ctx->relocs_pending || ctx->cmdbufs_pending) {
                reset_submit(ctx);
-               dev_err(&ctx->ch->dev->pdev->dev, "channel submit out of sync\n");
+               dev_err(device, "channel submit out of sync\n");
                return -EFAULT;
        }
        if (!ctx->nvmap) {
-               dev_err(&ctx->ch->dev->pdev->dev, "no nvmap context set\n");
+               dev_err(device, "no nvmap context set\n");
                return -EFAULT;
        }
-       if (ctx->num_gathers <= 2)
+       if (ctx->cur_gather == ctx->gathers)
                return 0;
 
-       /* keep module powered */
-       nvhost_module_busy(&ctx->ch->mod);
-
        /* pin mem handles and patch physical addresses */
        num_unpin = nvmap_pin_array(ctx->nvmap,
                                    nvmap_ref_to_handle(ctx->gather_mem),
                                    ctx->pinarray, ctx->pinarray_size,
                                    ctx->unpinarray);
        if (num_unpin < 0) {
-               dev_warn(&ctx->ch->dev->pdev->dev, "nvmap_pin_array failed: "
-                        "%d\n", num_unpin);
-               nvhost_module_idle(&ctx->ch->mod);
+               dev_warn(device, "nvmap_pin_array failed: %d\n", num_unpin);
                return num_unpin;
        }
 
-       /* get submit lock */
-       err = mutex_lock_interruptible(&ctx->ch->submitlock);
-       if (err) {
+       /* context switch if needed, and submit user's gathers to the channel */
+       err = nvhost_channel_submit(ctx->ch, ctx->hwctx, ctx->nvmap,
+                               ctx->gathers, ctx->cur_gather,
+                               ctx->unpinarray, num_unpin,
+                               ctx->syncpt_id, ctx->syncpt_incrs,
+                               &args->value,
+                               ctx->null_kickoff != 0);
+       if (err)
                nvmap_unpin_handles(ctx->nvmap, ctx->unpinarray, num_unpin);
-               nvhost_module_idle(&ctx->ch->mod);
-               return err;
-       }
-
-       /* context switch */
-       if (ctx->ch->cur_ctx != ctx->hwctx) {
-               struct nvhost_hwctx *hw = ctx->hwctx;
-               if (hw && hw->valid) {
-                       gather_idx--;
-                       ctx->gathers[gather_idx].op1 =
-                               nvhost_opcode_gather(0, hw->restore_size);
-                       ctx->gathers[gather_idx].op2 = hw->restore_phys;
-                       ctx->syncpt_incrs += hw->restore_incrs;
-               }
-               hw = ctx->ch->cur_ctx;
-               if (hw) {
-                       gather_idx--;
-                       ctx->gathers[gather_idx].op1 =
-                               nvhost_opcode_gather(0, hw->save_size);
-                       ctx->gathers[gather_idx].op2 = hw->save_phys;
-                       ctx->syncpt_incrs += hw->save_incrs;
-                       num_intrs = 1;
-                       ctxsw.syncpt_val = hw->save_incrs - 1;
-                       ctxsw.intr_data = hw;
-                       hw->valid = true;
-                       ctx->ch->ctxhandler.get(hw);
-               }
-               ctx->ch->cur_ctx = ctx->hwctx;
-       }
-
-       /* add a setclass for modules that require it */
-       if (gather_idx == 2 && ctx->ch->desc->class) {
-               gather_idx--;
-               ctx->gathers[gather_idx].op1 =
-                       nvhost_opcode_setclass(ctx->ch->desc->class, 0, 0);
-               ctx->gathers[gather_idx].op2 = NVHOST_OPCODE_NOOP;
-       }
-
-       /* get absolute sync value */
-       if (BIT(ctx->syncpt_id) & NVSYNCPTS_CLIENT_MANAGED)
-               syncval = nvhost_syncpt_set_max(&ctx->ch->dev->syncpt,
-                                               ctx->syncpt_id, ctx->syncpt_incrs);
-       else
-               syncval = nvhost_syncpt_incr_max(&ctx->ch->dev->syncpt,
-                                               ctx->syncpt_id, ctx->syncpt_incrs);
-
-       /* patch absolute syncpt value into interrupt triggers */
-       ctxsw.syncpt_val += syncval - ctx->syncpt_incrs;
-
-       nvhost_channel_submit(ctx->ch, ctx->nvmap, &ctx->gathers[gather_idx],
-                             (null_kickoff ? 2 : ctx->num_gathers) - gather_idx, &ctxsw, num_intrs,
-                             ctx->unpinarray, num_unpin,
-                             ctx->syncpt_id, syncval,
-                             nulled_incrs);
-
-       /* schedule a submit complete interrupt */
-       nvhost_intr_add_action(&ctx->ch->dev->intr, ctx->syncpt_id, syncval,
-                       NVHOST_INTR_ACTION_SUBMIT_COMPLETE, ctx->ch, NULL);
 
-       mutex_unlock(&ctx->ch->submitlock);
-       args->value = syncval;
        return 0;
 }
 
index 1840d47..0bc0699 100644 (file)
 
 #include <linux/slab.h>
 
-const struct hwctx_reginfo ctxsave_regs_3d[] = {
-       HWCTX_REGINFO(0xe00, 16, DIRECT),
-       HWCTX_REGINFO(0xe10, 16, DIRECT),
-       HWCTX_REGINFO(0xe20, 1, DIRECT),
-       HWCTX_REGINFO(0xe21, 1, DIRECT),
-       HWCTX_REGINFO(0xe22, 1, DIRECT),
-       HWCTX_REGINFO(0xe25, 1, DIRECT),
-       HWCTX_REGINFO(0xe26, 1, DIRECT),
-       HWCTX_REGINFO(0xe28, 2, DIRECT),
-       HWCTX_REGINFO(0xe2a, 1, DIRECT),
-       HWCTX_REGINFO(0x1, 1, DIRECT),
-       HWCTX_REGINFO(0x2, 1, DIRECT),
-       HWCTX_REGINFO(0xc, 2, DIRECT),
-       HWCTX_REGINFO(0xe, 2, DIRECT),
-       HWCTX_REGINFO(0x10, 2, DIRECT),
-       HWCTX_REGINFO(0x12, 2, DIRECT),
-       HWCTX_REGINFO(0x14, 2, DIRECT),
-       HWCTX_REGINFO(0x100, 32, DIRECT),
-       HWCTX_REGINFO(0x120, 1, DIRECT),
-       HWCTX_REGINFO(0x121, 1, DIRECT),
-       HWCTX_REGINFO(0x124, 1, DIRECT),
-       HWCTX_REGINFO(0x125, 1, DIRECT),
-       HWCTX_REGINFO(0x200, 1, DIRECT),
-       HWCTX_REGINFO(0x201, 1, DIRECT),
-       HWCTX_REGINFO(0x202, 1, DIRECT),
-       HWCTX_REGINFO(0x203, 1, DIRECT),
-       HWCTX_REGINFO(0x204, 1, DIRECT),
-       HWCTX_REGINFO(0x207, 1024, INDIRECT),
-       HWCTX_REGINFO(0x209, 1, DIRECT),
-       HWCTX_REGINFO(0x300, 64, DIRECT),
-       HWCTX_REGINFO(0x343, 1, DIRECT),
-       HWCTX_REGINFO(0x344, 1, DIRECT),
-       HWCTX_REGINFO(0x345, 1, DIRECT),
-       HWCTX_REGINFO(0x346, 1, DIRECT),
-       HWCTX_REGINFO(0x347, 1, DIRECT),
-       HWCTX_REGINFO(0x348, 1, DIRECT),
-       HWCTX_REGINFO(0x349, 1, DIRECT),
-       HWCTX_REGINFO(0x34a, 1, DIRECT),
-       HWCTX_REGINFO(0x34b, 1, DIRECT),
-       HWCTX_REGINFO(0x34c, 1, DIRECT),
-       HWCTX_REGINFO(0x34d, 1, DIRECT),
-       HWCTX_REGINFO(0x34e, 1, DIRECT),
-       HWCTX_REGINFO(0x34f, 1, DIRECT),
-       HWCTX_REGINFO(0x350, 1, DIRECT),
-       HWCTX_REGINFO(0x351, 1, DIRECT),
-       HWCTX_REGINFO(0x352, 1, DIRECT),
-       HWCTX_REGINFO(0x353, 1, DIRECT),
-       HWCTX_REGINFO(0x354, 1, DIRECT),
-       HWCTX_REGINFO(0x355, 1, DIRECT),
-       HWCTX_REGINFO(0x356, 1, DIRECT),
-       HWCTX_REGINFO(0x357, 1, DIRECT),
-       HWCTX_REGINFO(0x358, 1, DIRECT),
-       HWCTX_REGINFO(0x359, 1, DIRECT),
-       HWCTX_REGINFO(0x35a, 1, DIRECT),
-       HWCTX_REGINFO(0x35b, 1, DIRECT),
-       HWCTX_REGINFO(0x363, 1, DIRECT),
-       HWCTX_REGINFO(0x364, 1, DIRECT),
-       HWCTX_REGINFO(0x400, 2, DIRECT),
-       HWCTX_REGINFO(0x402, 1, DIRECT),
-       HWCTX_REGINFO(0x403, 1, DIRECT),
-       HWCTX_REGINFO(0x404, 1, DIRECT),
-       HWCTX_REGINFO(0x405, 1, DIRECT),
-       HWCTX_REGINFO(0x406, 1, DIRECT),
-       HWCTX_REGINFO(0x407, 1, DIRECT),
-       HWCTX_REGINFO(0x408, 1, DIRECT),
-       HWCTX_REGINFO(0x409, 1, DIRECT),
-       HWCTX_REGINFO(0x40a, 1, DIRECT),
-       HWCTX_REGINFO(0x40b, 1, DIRECT),
-       HWCTX_REGINFO(0x40c, 1, DIRECT),
-       HWCTX_REGINFO(0x40d, 1, DIRECT),
-       HWCTX_REGINFO(0x40e, 1, DIRECT),
-       HWCTX_REGINFO(0x40f, 1, DIRECT),
-       HWCTX_REGINFO(0x411, 1, DIRECT),
-       HWCTX_REGINFO(0x500, 1, DIRECT),
-       HWCTX_REGINFO(0x501, 1, DIRECT),
-       HWCTX_REGINFO(0x502, 1, DIRECT),
-       HWCTX_REGINFO(0x503, 1, DIRECT),
-       HWCTX_REGINFO(0x520, 32, DIRECT),
-       HWCTX_REGINFO(0x540, 64, INDIRECT),
-       HWCTX_REGINFO(0x600, 0, INDIRECT_OFFSET),
-       HWCTX_REGINFO(0x602, 16, INDIRECT_DATA),
-       HWCTX_REGINFO(0x603, 128, INDIRECT),
-       HWCTX_REGINFO(0x608, 4, DIRECT),
-       HWCTX_REGINFO(0x60e, 1, DIRECT),
-       HWCTX_REGINFO(0x700, 64, INDIRECT),
-       HWCTX_REGINFO(0x710, 16, DIRECT),
-       HWCTX_REGINFO(0x720, 32, DIRECT),
-       HWCTX_REGINFO(0x740, 1, DIRECT),
-       HWCTX_REGINFO(0x741, 1, DIRECT),
-       HWCTX_REGINFO(0x800, 0, INDIRECT_OFFSET),
-       HWCTX_REGINFO(0x802, 16, INDIRECT_DATA),
-       HWCTX_REGINFO(0x803, 512, INDIRECT),
-       HWCTX_REGINFO(0x805, 64, INDIRECT),
-       HWCTX_REGINFO(0x820, 32, DIRECT),
-       HWCTX_REGINFO(0x900, 64, INDIRECT),
-       HWCTX_REGINFO(0x902, 1, DIRECT),
-       HWCTX_REGINFO(0x903, 1, DIRECT),
-       HWCTX_REGINFO(0xa02, 1, DIRECT),
-       HWCTX_REGINFO(0xa03, 1, DIRECT),
-       HWCTX_REGINFO(0xa04, 1, DIRECT),
-       HWCTX_REGINFO(0xa05, 1, DIRECT),
-       HWCTX_REGINFO(0xa06, 1, DIRECT),
-       HWCTX_REGINFO(0xa07, 1, DIRECT),
-       HWCTX_REGINFO(0xa08, 1, DIRECT),
-       HWCTX_REGINFO(0xa09, 1, DIRECT),
-       HWCTX_REGINFO(0xa0a, 1, DIRECT),
-       HWCTX_REGINFO(0xa0b, 1, DIRECT),
-       HWCTX_REGINFO(0x205, 1024, INDIRECT)
+#if CONFIG_ARCH_TEGRA_3x_SOC
+static bool s_is_v1 = true;
+static int s_nr_gpus = 2;
+#else
+static bool s_is_v1 = false;
+static int s_nr_gpus = 1;
+#endif
+
+const struct hwctx_reginfo ctxsave_regs_3d_global[] = {
+       HWCTX_REGINFO(0, 0xe00,    4, DIRECT),
+       HWCTX_REGINFO(0, 0xe05,   30, DIRECT),
+       HWCTX_REGINFO(0, 0xe25,    2, DIRECT),
+       HWCTX_REGINFO(0, 0xe28,    2, DIRECT),
+       HWCTX_REGINFO(1, 0xe30,   16, DIRECT),
+       HWCTX_REGINFO(0, 0x001,    2, DIRECT),
+       HWCTX_REGINFO(0, 0x00c,   10, DIRECT),
+       HWCTX_REGINFO(0, 0x100,   34, DIRECT),
+       HWCTX_REGINFO(0, 0x124,    2, DIRECT),
+       HWCTX_REGINFO(0, 0x200,    5, DIRECT),
+       HWCTX_REGINFO(0, 0x205, 1024, INDIRECT),
+       HWCTX_REGINFO(0, 0x207, 1024, INDIRECT),
+       HWCTX_REGINFO(0, 0x209,    1, DIRECT),
+       HWCTX_REGINFO(0, 0x300,   64, DIRECT),
+       HWCTX_REGINFO(0, 0x343,   25, DIRECT),
+       HWCTX_REGINFO(0, 0x363,    2, DIRECT),
+       HWCTX_REGINFO(0, 0x400,   16, DIRECT),
+       HWCTX_REGINFO(0, 0x411,    1, DIRECT),
+       HWCTX_REGINFO(1, 0x412,    1, DIRECT),
+       HWCTX_REGINFO(0, 0x500,    4, DIRECT),
+       HWCTX_REGINFO(0, 0x520,   32, DIRECT),
+       HWCTX_REGINFO(0, 0x540,   64, INDIRECT),
+       HWCTX_REGINFO(0, 0x600,   16, INDIRECT_4X),
+       HWCTX_REGINFO(0, 0x603,  128, INDIRECT),
+       HWCTX_REGINFO(0, 0x608,    4, DIRECT),
+       HWCTX_REGINFO(0, 0x60e,    1, DIRECT),
+       HWCTX_REGINFO(0, 0x700,   64, INDIRECT),
+       HWCTX_REGINFO(0, 0x710,   50, DIRECT),
+       HWCTX_REGINFO(1, 0x750,   16, DIRECT),
+       HWCTX_REGINFO(0, 0x800,   16, INDIRECT_4X),
+       HWCTX_REGINFO(0, 0x803,  512, INDIRECT),
+       HWCTX_REGINFO(0, 0x805,   64, INDIRECT),
+       HWCTX_REGINFO(0, 0x820,   32, DIRECT),
+       HWCTX_REGINFO(0, 0x900,   64, INDIRECT),
+       HWCTX_REGINFO(0, 0x902,    2, DIRECT),
+       HWCTX_REGINFO(1, 0x90a,    1, DIRECT),
+       HWCTX_REGINFO(0, 0xa02,   10, DIRECT),
+       HWCTX_REGINFO(1, 0xb04,    1, DIRECT),
+       HWCTX_REGINFO(1, 0xb06,   13, DIRECT),
+};
+
+const struct hwctx_reginfo ctxsave_regs_3d_pergpu[] = {
+       HWCTX_REGINFO(0, 0xe04,    1, DIRECT),
+       HWCTX_REGINFO(0, 0xe2a,    1, DIRECT),
+       HWCTX_REGINFO(1, 0x413,    1, DIRECT),
+       HWCTX_REGINFO(1, 0x90b,    1, DIRECT),
+       HWCTX_REGINFO(1, 0xe41,    1, DIRECT),
+};
+
+struct save_info {
+       u32 *ptr;
+       unsigned int save_count;
+       unsigned int restore_count;
+};
+
+struct ctx_saver {
+       unsigned int version;
+       void (*save_begin)(u32 *ptr);
+       unsigned int save_begin_size;
+       void (*save_direct)(u32 *ptr, u32 start_reg, u32 count);
+       unsigned int save_direct_size;
+       void (*save_indirect)(u32 *ptr, u32 offset_reg, u32 offset,
+                       u32 data_reg, u32 count);
+       unsigned int save_indirect_size;
+       void (*save_end)(u32 *ptr);
+       unsigned int save_end_size;
+       struct nvhost_hwctx *(*ctx3d_alloc)(struct nvhost_channel *ch);
+       void (*ctx3d_save_push)(struct nvhost_cdma *cdma,
+                               struct nvhost_hwctx *ctx);
+       void (*ctx3d_save_service)(struct nvhost_hwctx *ctx);
 };
 
 
 /*** restore ***/
 
-static unsigned int context_restore_size = 0;
+static unsigned int restore_size = 0;
+static unsigned int restore_gpu1_offset = 0;
 
-static void restore_begin(u32 *ptr, u32 waitbase)
+static void restore_begin(u32 *ptr)
 {
        /* set class to host */
        ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
                                        NV_CLASS_HOST_INCR_SYNCPT_BASE, 1);
        /* increment sync point base */
-       ptr[1] = nvhost_class_host_incr_syncpt_base(waitbase, 1);
+       ptr[1] = nvhost_class_host_incr_syncpt_base(NVWAITBASE_3D, 1);
        /* set class to 3D */
        ptr[2] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
        /* program PSEQ_QUAD_ID */
@@ -154,36 +126,282 @@ static void restore_begin(u32 *ptr, u32 waitbase)
 }
 #define RESTORE_BEGIN_SIZE 4
 
-static void restore_end(u32 *ptr, u32 syncpt_id)
+static void restore_direct(u32 *ptr, u32 start_reg, u32 count)
+{
+       ptr[0] = nvhost_opcode_incr(start_reg, count);
+}
+#define RESTORE_DIRECT_SIZE 1
+
+static void restore_indirect(u32 *ptr, u32 offset_reg, u32 offset,
+                       u32 data_reg, u32 count)
+{
+       ptr[0] = nvhost_opcode_imm(offset_reg, offset);
+       ptr[1] = nvhost_opcode_nonincr(data_reg, count);
+}
+#define RESTORE_INDIRECT_SIZE 2
+
+static void restore_end(u32 *ptr)
 {
        /* syncpt increment to track restore gather. */
-       ptr[0] = nvhost_opcode_imm(0x0, ((1UL << 8) | (u8)(syncpt_id & 0xff)));
+       ptr[0] = nvhost_opcode_imm(0x0, ((1UL << 8) | (u8)(NVSYNCPT_3D & 0xff)));
 }
 #define RESTORE_END_SIZE 1
 
-static void restore_direct(u32 *ptr, u32 start_reg, u32 count)
+static u32 *setup_restore_regs_v0(u32 *ptr,
+                       const struct hwctx_reginfo *regs,
+                       unsigned int nr_regs)
 {
-       ptr[0] = nvhost_opcode_incr(start_reg, count);
+       const struct hwctx_reginfo *rend = regs + nr_regs;
+
+       for ( ; regs != rend; ++regs) {
+               u32 offset = regs->offset;
+               u32 count = regs->count;
+               u32 indoff = offset + 1;
+               if (regs->version > 0)
+                       continue;
+               switch (regs->type) {
+               case HWCTX_REGINFO_DIRECT:
+                       restore_direct(ptr, offset, count);
+                       ptr += RESTORE_DIRECT_SIZE;
+                       break;
+               case HWCTX_REGINFO_INDIRECT_4X:
+                       ++indoff;
+                       /* fall through */
+               case HWCTX_REGINFO_INDIRECT:
+                       restore_indirect(ptr, offset, 0, indoff, count);
+                       ptr += RESTORE_INDIRECT_SIZE;
+                       break;
+               }
+               ptr += count;
+       }
+       return ptr;
 }
-#define RESTORE_DIRECT_SIZE 1
 
-static void restore_indoffset(u32 *ptr, u32 offset_reg, u32 offset)
+static void setup_restore_v0(u32 *ptr)
 {
-       ptr[0] = nvhost_opcode_imm(offset_reg, offset);
+       restore_begin(ptr);
+       ptr += RESTORE_BEGIN_SIZE;
+
+       ptr = setup_restore_regs_v0(ptr,
+                       ctxsave_regs_3d_global,
+                       ARRAY_SIZE(ctxsave_regs_3d_global));
+
+       ptr = setup_restore_regs_v0(ptr,
+                       ctxsave_regs_3d_pergpu,
+                       ARRAY_SIZE(ctxsave_regs_3d_pergpu));
+
+       restore_end(ptr);
+
+       wmb();
 }
-#define RESTORE_INDOFFSET_SIZE 1
 
-static void restore_inddata(u32 *ptr, u32 data_reg, u32 count)
+
+/*** save ***/
+
+/* the same context save command sequence is used for all contexts. */
+static struct nvmap_handle_ref *save_buf = NULL;
+static u32 save_phys = 0;
+static unsigned int save_size = 0;
+
+static void __init setup_save_regs(const struct ctx_saver *saver,
+                       struct save_info *info,
+                       const struct hwctx_reginfo *regs,
+                       unsigned int nr_regs)
 {
-       ptr[0] = nvhost_opcode_nonincr(data_reg, count);
+       const struct hwctx_reginfo *rend = regs + nr_regs;
+       u32 *ptr = info->ptr;
+       unsigned int save_count = info->save_count;
+       unsigned int restore_count = info->restore_count;
+
+       for ( ; regs != rend; ++regs) {
+               u32 offset = regs->offset;
+               u32 count = regs->count;
+               u32 indoff = offset + 1;
+               if (regs->version > saver->version)
+                       continue;
+               switch (regs->type) {
+               case HWCTX_REGINFO_DIRECT:
+                       if (ptr) {
+                               saver->save_direct(ptr, offset, count);
+                               ptr += saver->save_direct_size;
+                       }
+                       save_count += saver->save_direct_size;
+                       restore_count += RESTORE_DIRECT_SIZE;
+                       break;
+               case HWCTX_REGINFO_INDIRECT_4X:
+                       ++indoff;
+                       /* fall through */
+               case HWCTX_REGINFO_INDIRECT:
+                       if (ptr) {
+                               saver->save_indirect(ptr, offset, 0,
+                                               indoff, count);
+                               ptr += saver->save_indirect_size;
+                       }
+                       save_count += saver->save_indirect_size;
+                       restore_count += RESTORE_INDIRECT_SIZE;
+                       break;
+               }
+               if (ptr) {
+                       memset(ptr, 0, count * 4);
+                       ptr += count;
+               }
+               save_count += count;
+               restore_count += count;
+       }
+
+       info->ptr = ptr;
+       info->save_count = save_count;
+       info->restore_count = restore_count;
+}
+
+static void __init switch_gpu(struct save_info *info,
+                       unsigned int save_src_gpu,
+                       u32 save_dest_gpus,
+                       u32 restore_dest_gpus)
+{
+       if (info->ptr) {
+               info->ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
+                                               0x905, 1);
+               info->ptr[1] = nvhost_opcode_imm(0xb00, restore_dest_gpus);
+               info->ptr[2] = nvhost_opcode_imm(0xb00, save_dest_gpus);
+               info->ptr[3] = nvhost_opcode_imm(0xb01, save_src_gpu);
+               info->ptr += 4;
+       }
+       info->save_count += 4;
+       info->restore_count += 1;
+}
+
+static void __init setup_save(const struct ctx_saver *saver, u32 *ptr)
+{
+       struct save_info info = {
+               ptr,
+               saver->save_begin_size,
+               RESTORE_BEGIN_SIZE
+       };
+       bool is_sli = (s_nr_gpus == 2);
+       BUG_ON(s_nr_gpus > 2);
+
+       if (info.ptr) {
+               saver->save_begin(info.ptr);
+               info.ptr += saver->save_begin_size;
+       }
+
+       /* read from gpu0, write cmds through gpu0, restore to gpu0+gpu1 */
+       if (is_sli)
+               switch_gpu(&info, 0, 1, 3);
+
+       /* save regs that are common to both gpus */
+       setup_save_regs(saver, &info,
+                       ctxsave_regs_3d_global,
+                       ARRAY_SIZE(ctxsave_regs_3d_global));
+
+       /* read from gpu0, write cmds through gpu0, restore to gpu0 */
+       if (is_sli)
+               switch_gpu(&info, 0, 1, 1);
+
+       /* save gpu0-specific regs */
+       setup_save_regs(saver, &info,
+                       ctxsave_regs_3d_pergpu,
+                       ARRAY_SIZE(ctxsave_regs_3d_pergpu));
+
+       if (is_sli) {
+               /* read from gpu1, write cmds through gpu1, restore to gpu1 */
+               switch_gpu(&info, 1, 2, 2);
+               /* note offset at which gpu 1 restore starts */
+               restore_gpu1_offset = info.restore_count;
+               /* save gpu1-specific regs */
+               setup_save_regs(saver, &info,
+                               ctxsave_regs_3d_pergpu,
+                               ARRAY_SIZE(ctxsave_regs_3d_pergpu));
+       }
+
+       /* read from gpu0, write cmds through gpu1, restore to gpu0+gpu1 */
+       if (is_sli)
+               switch_gpu(&info, 0, 2, 3);
+
+       if (info.ptr) {
+               saver->save_end(info.ptr);
+               info.ptr += saver->save_end_size;
+       }
+
+       wmb();
+
+       save_size = info.save_count + saver->save_end_size;
+       restore_size = info.restore_count + RESTORE_END_SIZE;
+}
+
+
+/*** v0 saver ***/
+
+#define SAVE_SYNCPT_INCRS_V0 3
+#define SAVE_SYNCPT_THRESH_V0 2
+
+static void save_push_v0(struct nvhost_cdma *cdma,
+                       struct nvhost_hwctx *ctx)
+{
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_gather(save_size),
+                       save_phys);
+}
+
+static void __init save_begin_v0(u32 *ptr)
+{
+       /* 3d: when done, increment syncpt to base+1 */
+       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
+       ptr[1] = nvhost_opcode_imm(0, 0x100 | NVSYNCPT_3D); /* incr 1 */
+       /* host: wait for syncpt base+1 */
+       ptr[2] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1);
+       ptr[3] = nvhost_class_host_wait_syncpt_base(NVSYNCPT_3D,
+                                               NVWAITBASE_3D, 1);
+       /* host: signal context read thread to start reading */
+       ptr[4] = nvhost_opcode_imm(0, NVSYNCPT_3D); /* incr 2 */
+}
+#define SAVE_BEGIN_V0_SIZE 5
+
+static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
+{
+       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDOFF, 1);
+       ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
+                                               start_reg, true);
+       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
+}
+#define SAVE_DIRECT_V0_SIZE 3
+
+static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
+                       u32 data_reg, u32 count)
+{
+       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
+                                       offset_reg, 1);
+       ptr[1] = offset;
+       ptr[2] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_INDOFF, 1);
+       ptr[3] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
+                                               data_reg, false);
+       ptr[4] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
+}
+#define SAVE_INDIRECT_V0_SIZE 5
+
+static void __init save_end_v0(u32 *ptr)
+{
+       /* Wait for context read service to finish (cpu incr 3) */
+       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1);
+       ptr[1] = nvhost_class_host_wait_syncpt_base(NVSYNCPT_3D,
+                                               NVWAITBASE_3D,
+                                               SAVE_SYNCPT_INCRS_V0);
+       /* Advance syncpoint base */
+       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INCR_SYNCPT_BASE, 1);
+       ptr[3] = nvhost_class_host_incr_syncpt_base(NVWAITBASE_3D,
+                                               SAVE_SYNCPT_INCRS_V0);
+       /* set class back to the unit */
+       ptr[4] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
 }
-#define RESTORE_INDDATA_SIZE 1
+#define SAVE_END_V0_SIZE 5
 
-static void restore_registers_from_fifo(u32 *ptr, unsigned int count,
-                                       struct nvhost_channel *channel,
+static void save_registers_from_fifo(u32 *ptr, unsigned int count,
+                                       void __iomem *chan_regs,
                                        unsigned int *pending)
 {
-       void __iomem *chan_regs = channel->aperture;
        unsigned int entries = *pending;
        while (count) {
                unsigned int num;
@@ -216,244 +434,226 @@ static void restore_registers_from_fifo(u32 *ptr, unsigned int count,
        *pending = entries;
 }
 
-static void setup_restore(u32 *ptr, u32 waitbase)
+static u32 *save_regs_v0(u32 *ptr, unsigned int *pending,
+                       void __iomem *chan_regs,
+                       const struct hwctx_reginfo *regs,
+                       unsigned int nr_regs)
 {
-       const struct hwctx_reginfo *r;
-       const struct hwctx_reginfo *rend;
+       const struct hwctx_reginfo *rend = regs + nr_regs;
 
-       restore_begin(ptr, waitbase);
-       ptr += RESTORE_BEGIN_SIZE;
-
-       r = ctxsave_regs_3d;
-       rend = ctxsave_regs_3d + ARRAY_SIZE(ctxsave_regs_3d);
-       for ( ; r != rend; ++r) {
-               u32 offset = r->offset;
-               u32 count = r->count;
-               switch (r->type) {
+       for ( ; regs != rend; ++regs) {
+               u32 count = regs->count;
+               if (regs->version > 0)
+                       continue;
+               switch (regs->type) {
                case HWCTX_REGINFO_DIRECT:
-                       restore_direct(ptr, offset, count);
                        ptr += RESTORE_DIRECT_SIZE;
                        break;
                case HWCTX_REGINFO_INDIRECT:
-                       restore_indoffset(ptr, offset, 0);
-                       ptr += RESTORE_INDOFFSET_SIZE;
-                       restore_inddata(ptr, offset + 1, count);
-                       ptr += RESTORE_INDDATA_SIZE;
-                       break;
-               case HWCTX_REGINFO_INDIRECT_OFFSET:
-                       restore_indoffset(ptr, offset, count);
-                       ptr += RESTORE_INDOFFSET_SIZE;
-                       continue; /* INDIRECT_DATA follows with real count */
-               case HWCTX_REGINFO_INDIRECT_DATA:
-                       restore_inddata(ptr, offset, count);
-                       ptr += RESTORE_INDDATA_SIZE;
+               case HWCTX_REGINFO_INDIRECT_4X:
+                       ptr += RESTORE_INDIRECT_SIZE;
                        break;
                }
+               save_registers_from_fifo(ptr, count, chan_regs, pending);
                ptr += count;
        }
-
-       restore_end(ptr, NVSYNCPT_3D);
-       wmb();
+       return ptr;
 }
 
-/*** save ***/
 
-/* the same context save command sequence is used for all contexts. */
-static struct nvmap_handle_ref *context_save_buf = NULL;
-static u32 context_save_phys = 0;
-static u32 *context_save_ptr = NULL;
-static unsigned int context_save_size = 0;
+/*** v1 saver ***/
+
+#define SAVE_SYNCPT_INCRS_V1 2
+#define SAVE_SYNCPT_THRESH_V1 2
 
-static void save_begin(u32 *ptr, u32 syncpt_id, u32 waitbase)
+static void save_push_v1(struct nvhost_cdma *cdma,
+                       struct nvhost_hwctx *ctx)
 {
-       /* set class to the unit to flush */
-       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
-       /*
-        * Flush pipe and signal context read thread to start reading
-        * sync point increment
-        */
-       ptr[1] = nvhost_opcode_imm(0, 0x100 | syncpt_id);
-       ptr[2] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
-                                       NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1);
-       /* wait for base+1 */
-       ptr[3] = nvhost_class_host_wait_syncpt_base(syncpt_id, waitbase, 1);
-       ptr[4] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
-       ptr[5] = nvhost_opcode_imm(0, syncpt_id);
-       ptr[6] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, 0, 0);
+       /* wait for 3d idle */
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
+                       nvhost_opcode_imm(0, 0x100 | NVSYNCPT_3D)); /* incr 1 */
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1),
+                       nvhost_class_host_wait_syncpt_base(NVSYNCPT_3D,
+                                                       NVWAITBASE_3D, 1));
+       /* back to 3d */
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
+                       NVHOST_OPCODE_NOOP);
+       /* set gpu1 and gpu0's register read memory output addresses,
+          and send their reads to memory */
+       if (s_nr_gpus == 2) {
+               nvhost_cdma_push(cdma,
+                               nvhost_opcode_imm(0xb00, 2),
+                               nvhost_opcode_imm(0xe40, 1));
+               nvhost_cdma_push(cdma,
+                               nvhost_opcode_nonincr(0x904, 1),
+                               ctx->restore_phys + restore_gpu1_offset * 4);
+       }
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_imm(0xb00, 1),
+                       nvhost_opcode_imm(0xe40, 1));
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_nonincr(0x904, 1),
+                       ctx->restore_phys);
+       /* gather the save buffer */
+       nvhost_cdma_push(cdma,
+                       nvhost_opcode_gather(save_size),
+                       save_phys);
 }
-#define SAVE_BEGIN_SIZE 7
 
-static void save_direct(u32 *ptr, u32 start_reg, u32 count)
+static void __init save_begin_v1(u32 *ptr)
 {
-       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDOFF, 1);
-       ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
-                                               start_reg, true);
-       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
+       ptr[0] = nvhost_opcode_nonincr(0x905, RESTORE_BEGIN_SIZE);
+       restore_begin(ptr + 1);
+       ptr += RESTORE_BEGIN_SIZE;
 }
-#define SAVE_DIRECT_SIZE 3
+#define SAVE_BEGIN_V1_SIZE (1 + RESTORE_BEGIN_SIZE)
 
-static void save_indoffset(u32 *ptr, u32 offset_reg, u32 offset)
+static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
 {
-       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDOFF, 1);
-       ptr[1] = nvhost_class_host_indoff_reg_write(NV_HOST_MODULE_GR3D,
-                                               offset_reg, true);
-       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, 1);
-       ptr[3] = offset;
+#if RESTORE_DIRECT_SIZE != 1
+#error whoops! code is optimized for RESTORE_DIRECT_SIZE == 1
+#endif
+       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0x905, 1);
+       restore_direct(ptr + 1, start_reg, count);
+       ptr += RESTORE_DIRECT_SIZE;
+       ptr[1] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_INDOFF, 1);
+       ptr[2] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
+                                               start_reg, true);
+       /* TODO could do this in the setclass if count < 6 */
+       ptr[3] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
 }
-#define SAVE_INDOFFSET_SIZE 4
+#define SAVE_DIRECT_V1_SIZE (4 + RESTORE_DIRECT_SIZE)
 
-static inline void save_inddata(u32 *ptr, u32 data_reg, u32 count)
+static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
+                       u32 data_reg, u32 count)
 {
-       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDOFF, 1);
-       ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
+       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
+       ptr[1] = nvhost_opcode_nonincr(0x905, RESTORE_INDIRECT_SIZE);
+       restore_indirect(ptr + 2, offset_reg, offset, data_reg, count);
+       ptr += RESTORE_INDIRECT_SIZE;
+       ptr[2] = nvhost_opcode_imm(offset_reg, offset);
+       ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_INDOFF, 1);
+       ptr[4] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
                                                data_reg, false);
-       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
+       ptr[5] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
 }
-#define SAVE_INDDDATA_SIZE 3
+#define SAVE_INDIRECT_V1_SIZE (6 + RESTORE_INDIRECT_SIZE)
 
-static void save_end(u32 *ptr, u32 syncpt_id, u32 waitbase)
+static void __init save_end_v1(u32 *ptr)
 {
-       /* Wait for context read service */
-       ptr[0] = nvhost_opcode_nonincr(NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1);
-       ptr[1] = nvhost_class_host_wait_syncpt_base(syncpt_id, waitbase, 3);
-       /* Increment syncpoint base */
-       ptr[2] = nvhost_opcode_nonincr(NV_CLASS_HOST_INCR_SYNCPT_BASE, 1);
-       ptr[3] = nvhost_class_host_incr_syncpt_base(waitbase, 3);
-       /* set class back to the unit */
-       ptr[4] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
+#if RESTORE_END_SIZE != 1
+#error whoops! code is optimized for RESTORE_END_SIZE == 1
+#endif
+       /* write end of restore buffer */
+       ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0x905, 1);
+       restore_end(ptr + 1);
+       ptr += RESTORE_END_SIZE;
+       /* reset to sli if necessary */
+       ptr[1] = nvhost_opcode_imm(0xb00, (1 << s_nr_gpus) - 1);
+       /* op_done syncpt incr to flush FDC (and release any waiters) */
+       ptr[2] = nvhost_opcode_imm(0, 0x100 | NVSYNCPT_3D); /* incr 2 */
+       /* host wait for that syncpt incr, and advance the wait base */
+       ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+                                       NV_CLASS_HOST_WAIT_SYNCPT_BASE,
+                                       nvhost_mask2(
+                                               NV_CLASS_HOST_WAIT_SYNCPT_BASE,
+                                               NV_CLASS_HOST_INCR_SYNCPT_BASE));
+       ptr[4] = nvhost_class_host_wait_syncpt_base(NVSYNCPT_3D,
+                                               NVWAITBASE_3D, 2);
+       ptr[5] = nvhost_class_host_incr_syncpt_base(NVWAITBASE_3D,
+                                               SAVE_SYNCPT_INCRS_V1);
+       /* set class back to 3d */
+       ptr[6] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
+       /* send reg reads back to host */
+       ptr[7] = nvhost_opcode_imm(0xe40, 0);
 }
-#define SAVE_END_SIZE 5
-
-static void __init setup_save(
-       u32 *ptr, unsigned int *words_save, unsigned int *words_restore,
-       u32 syncpt_id, u32 waitbase)
-{
-       const struct hwctx_reginfo *r;
-       const struct hwctx_reginfo *rend;
-       unsigned int save = SAVE_BEGIN_SIZE + SAVE_END_SIZE;
-       unsigned int restore = RESTORE_BEGIN_SIZE + RESTORE_END_SIZE;
-
-       if (ptr) {
-               save_begin(ptr, syncpt_id, waitbase);
-               ptr += SAVE_BEGIN_SIZE;
-       }
-
-       r = ctxsave_regs_3d;
-       rend = ctxsave_regs_3d + ARRAY_SIZE(ctxsave_regs_3d);
-       for ( ; r != rend; ++r) {
-               u32 offset = r->offset;
-               u32 count = r->count;
-               switch (r->type) {
-               case HWCTX_REGINFO_DIRECT:
-                       if (ptr) {
-                               save_direct(ptr, offset, count);
-                               ptr += SAVE_DIRECT_SIZE;
-                       }
-                       save += SAVE_DIRECT_SIZE;
-                       restore += RESTORE_DIRECT_SIZE;
-                       break;
-               case HWCTX_REGINFO_INDIRECT:
-                       if (ptr) {
-                               save_indoffset(ptr, offset, 0);
-                               ptr += SAVE_INDOFFSET_SIZE;
-                               save_inddata(ptr, offset + 1, count);
-                               ptr += SAVE_INDDDATA_SIZE;
-                       }
-                       save += SAVE_INDOFFSET_SIZE;
-                       restore += RESTORE_INDOFFSET_SIZE;
-                       save += SAVE_INDDDATA_SIZE;
-                       restore += RESTORE_INDDATA_SIZE;
-                       break;
-               case HWCTX_REGINFO_INDIRECT_OFFSET:
-                       if (ptr) {
-                               save_indoffset(ptr, offset, count);
-                               ptr += SAVE_INDOFFSET_SIZE;
-                       }
-                       save += SAVE_INDOFFSET_SIZE;
-                       restore += RESTORE_INDOFFSET_SIZE;
-                       continue; /* INDIRECT_DATA follows with real count */
-               case HWCTX_REGINFO_INDIRECT_DATA:
-                       if (ptr) {
-                               save_inddata(ptr, offset, count);
-                               ptr += SAVE_INDDDATA_SIZE;
-                       }
-                       save += SAVE_INDDDATA_SIZE;
-                       restore += RESTORE_INDDATA_SIZE;
-                       break;
-               }
-               if (ptr) {
-                       memset(ptr, 0, count * 4);
-                       ptr += count;
-               }
-               save += count;
-               restore += count;
-       }
+#define SAVE_END_V1_SIZE (8 + RESTORE_END_SIZE)
 
-       if (ptr)
-               save_end(ptr, syncpt_id, waitbase);
-
-       if (words_save)
-               *words_save = save;
-       if (words_restore)
-               *words_restore = restore;
-       wmb();
-}
 
 /*** ctx3d ***/
 
-static struct nvhost_hwctx *ctx3d_alloc(struct nvhost_channel *ch)
+static struct nvhost_hwctx *ctx3d_alloc_common(struct nvhost_channel *ch,
+                                       u32 save_incrs, u32 save_thresh,
+                                       bool map_restore)
 {
-       struct nvhost_hwctx *ctx;
        struct nvmap_client *nvmap = ch->dev->nvmap;
+       struct nvhost_hwctx *ctx;
 
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
                return NULL;
-       ctx->restore = nvmap_alloc(nvmap, context_restore_size * 4, 32,
-                                  NVMAP_HANDLE_WRITE_COMBINE);
-
+       ctx->restore = nvmap_alloc(nvmap, restore_size * 4, 32,
+               map_restore ? NVMAP_HANDLE_WRITE_COMBINE
+                           : NVMAP_HANDLE_UNCACHEABLE);
        if (IS_ERR_OR_NULL(ctx->restore)) {
                kfree(ctx);
                return NULL;
        }
 
-       ctx->save_cpu_data = nvmap_mmap(ctx->restore);
-       if (!ctx->save_cpu_data) {
-               nvmap_free(nvmap, ctx->restore);
-               kfree(ctx);
-               return NULL;
+       if (map_restore) {
+               ctx->restore_virt = nvmap_mmap(ctx->restore);
+               if (!ctx->restore_virt) {
+                       nvmap_free(nvmap, ctx->restore);
+                       kfree(ctx);
+                       return NULL;
+               }
+       } else {
+               ctx->restore_virt = NULL;
        }
 
-       setup_restore(ctx->save_cpu_data, NVWAITBASE_3D);
+       kref_init(&ctx->ref);
        ctx->channel = ch;
+       ctx->valid = false;
+       ctx->save = save_buf;
+       ctx->save_incrs = save_incrs;
+       ctx->save_thresh = save_thresh;
        ctx->restore_phys = nvmap_pin(nvmap, ctx->restore);
-       ctx->restore_size = context_restore_size;
-       ctx->save = context_save_buf;
-       ctx->save_phys = context_save_phys;
-       ctx->save_size = context_save_size;
-       ctx->save_incrs = 3;
+       ctx->restore_size = restore_size;
        ctx->restore_incrs = 1;
-       ctx->valid = false;
-       kref_init(&ctx->ref);
        return ctx;
 }
 
+static struct nvhost_hwctx *ctx3d_alloc_v0(struct nvhost_channel *ch)
+{
+       struct nvhost_hwctx *ctx = ctx3d_alloc_common(ch,
+                                               SAVE_SYNCPT_INCRS_V0,
+                                               SAVE_SYNCPT_THRESH_V0,
+                                               true);
+       if (ctx)
+               setup_restore_v0(ctx->restore_virt);
+       return ctx;
+}
+
+static struct nvhost_hwctx *ctx3d_alloc_v1(struct nvhost_channel *ch)
+{
+       return ctx3d_alloc_common(ch,
+                               SAVE_SYNCPT_INCRS_V1,
+                               SAVE_SYNCPT_THRESH_V1, false);
+}
+
+static void ctx3d_get(struct nvhost_hwctx *ctx)
+{
+       kref_get(&ctx->ref);
+}
+
 static void ctx3d_free(struct kref *ref)
 {
        struct nvhost_hwctx *ctx = container_of(ref, struct nvhost_hwctx, ref);
        struct nvmap_client *nvmap = ctx->channel->dev->nvmap;
 
-       nvmap_munmap(ctx->restore, ctx->save_cpu_data);
+       if (ctx->restore_virt)
+               nvmap_munmap(ctx->restore, ctx->restore_virt);
        nvmap_unpin(nvmap, ctx->restore);
        nvmap_free(nvmap, ctx->restore);
        kfree(ctx);
 }
 
-static void ctx3d_get(struct nvhost_hwctx *ctx)
-{
-       kref_get(&ctx->ref);
-}
-
 static void ctx3d_put(struct nvhost_hwctx *ctx)
 {
        kref_put(&ctx->ref, ctx3d_free);
@@ -461,78 +661,94 @@ static void ctx3d_put(struct nvhost_hwctx *ctx)
 
 static void ctx3d_save_service(struct nvhost_hwctx *ctx)
 {
-       const struct hwctx_reginfo *r;
-       const struct hwctx_reginfo *rend;
+       u32 *ptr = (u32 *)ctx->restore_virt + RESTORE_BEGIN_SIZE;
        unsigned int pending = 0;
-       u32 *ptr = (u32 *)ctx->save_cpu_data + RESTORE_BEGIN_SIZE;
 
-       BUG_ON(!ctx->save_cpu_data);
+       ptr = save_regs_v0(ptr, &pending, ctx->channel->aperture,
+                       ctxsave_regs_3d_global,
+                       ARRAY_SIZE(ctxsave_regs_3d_global));
 
-       r = ctxsave_regs_3d;
-       rend = ctxsave_regs_3d + ARRAY_SIZE(ctxsave_regs_3d);
-       for ( ; r != rend; ++r) {
-               u32 count = r->count;
-               switch (r->type) {
-               case HWCTX_REGINFO_DIRECT:
-                       ptr += RESTORE_DIRECT_SIZE;
-                       break;
-               case HWCTX_REGINFO_INDIRECT:
-                       ptr += RESTORE_INDOFFSET_SIZE + RESTORE_INDDATA_SIZE;
-                       break;
-               case HWCTX_REGINFO_INDIRECT_OFFSET:
-                       ptr += RESTORE_INDOFFSET_SIZE;
-                       continue; /* INDIRECT_DATA follows with real count */
-               case HWCTX_REGINFO_INDIRECT_DATA:
-                       ptr += RESTORE_INDDATA_SIZE;
-                       break;
-               }
-               restore_registers_from_fifo(ptr, count, ctx->channel, &pending);
-               ptr += count;
-       }
-
-       BUG_ON((u32)((ptr + RESTORE_END_SIZE) - (u32*)ctx->save_cpu_data)
-               != context_restore_size);
+       ptr = save_regs_v0(ptr, &pending, ctx->channel->aperture,
+                       ctxsave_regs_3d_pergpu,
+                       ARRAY_SIZE(ctxsave_regs_3d_pergpu));
 
        wmb();
        nvhost_syncpt_cpu_incr(&ctx->channel->dev->syncpt, NVSYNCPT_3D);
 }
 
 
+/*** savers ***/
+
+static const struct ctx_saver v0_saver __initconst = {
+       .version = 0,
+       .save_begin = save_begin_v0,
+       .save_begin_size = SAVE_BEGIN_V0_SIZE,
+       .save_direct = save_direct_v0,
+       .save_direct_size = SAVE_DIRECT_V0_SIZE,
+       .save_indirect = save_indirect_v0,
+       .save_indirect_size = SAVE_INDIRECT_V0_SIZE,
+       .save_end = save_end_v0,
+       .save_end_size = SAVE_END_V0_SIZE,
+       .ctx3d_alloc = ctx3d_alloc_v0,
+       .ctx3d_save_push = save_push_v0,
+       .ctx3d_save_service = ctx3d_save_service
+};
+
+static const struct ctx_saver v1_saver __initconst = {
+       .version = 1,
+       .save_begin = save_begin_v1,
+       .save_begin_size = SAVE_BEGIN_V1_SIZE,
+       .save_direct = save_direct_v1,
+       .save_direct_size = SAVE_DIRECT_V1_SIZE,
+       .save_indirect = save_indirect_v1,
+       .save_indirect_size = SAVE_INDIRECT_V1_SIZE,
+       .save_end = save_end_v1,
+       .save_end_size = SAVE_END_V1_SIZE,
+       .ctx3d_alloc = ctx3d_alloc_v1,
+       .ctx3d_save_push = save_push_v1,
+       .ctx3d_save_service = NULL
+};
+
+
 /*** nvhost_3dctx ***/
 
 int __init nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h)
 {
+       const struct ctx_saver *saver = s_is_v1 ? &v1_saver : &v0_saver;
        struct nvhost_channel *ch;
        struct nvmap_client *nvmap;
+       u32 *save_ptr;
 
        ch = container_of(h, struct nvhost_channel, ctxhandler);
        nvmap = ch->dev->nvmap;
 
-       setup_save(NULL, &context_save_size, &context_restore_size, 0, 0);
+       setup_save(saver, NULL);
 
-       context_save_buf = nvmap_alloc(nvmap, context_save_size * 4, 32,
-                                      NVMAP_HANDLE_WRITE_COMBINE);
-
-       if (IS_ERR(context_save_buf)) {
-               int err = PTR_ERR(context_save_buf);
-               context_save_buf = NULL;
+       save_buf = nvmap_alloc(nvmap, save_size * 4, 32,
+                               NVMAP_HANDLE_WRITE_COMBINE);
+       if (IS_ERR(save_buf)) {
+               int err = PTR_ERR(save_buf);
+               save_buf = NULL;
                return err;
        }
 
-       context_save_ptr = nvmap_mmap(context_save_buf);
-       if (!context_save_ptr) {
-               nvmap_free(nvmap, context_save_buf);
-               context_save_buf = NULL;
+       save_ptr = nvmap_mmap(save_buf);
+       if (!save_ptr) {
+               nvmap_free(nvmap, save_buf);
+               save_buf = NULL;
                return -ENOMEM;
        }
 
-       context_save_phys = nvmap_pin(nvmap, context_save_buf);
-       setup_save(context_save_ptr, NULL, NULL, NVSYNCPT_3D, NVWAITBASE_3D);
+       save_phys = nvmap_pin(nvmap, save_buf);
+
+       setup_save(saver, save_ptr);
 
-       h->alloc = ctx3d_alloc;
+       h->alloc = saver->ctx3d_alloc;
+       h->save_push = saver->ctx3d_save_push;
+       h->save_service = saver->ctx3d_save_service;
        h->get = ctx3d_get;
        h->put = ctx3d_put;
-       h->save_service = ctx3d_save_service;
+
        return 0;
 }
 
index b4e49a0..71695cf 100644 (file)
@@ -172,58 +172,130 @@ void nvhost_channel_suspend(struct nvhost_channel *ch)
        mutex_unlock(&ch->reflock);
 }
 
-void nvhost_channel_submit(struct nvhost_channel *ch,
+int nvhost_channel_submit(struct nvhost_channel *channel,
+                       struct nvhost_hwctx *hwctx,
                        struct nvmap_client *user_nvmap,
-                       struct nvhost_op_pair *ops, int num_pairs,
-                       struct nvhost_cpuinterrupt *intrs, int num_intrs,
-                       struct nvmap_handle **unpins, int num_unpins,
-                       u32 syncpt_id, u32 syncpt_val,
-                       int num_nulled_incrs)
+                       u32 *gather,
+                       u32 *gather_end,
+                       struct nvmap_handle **unpins,
+                       int nr_unpins,
+                       u32 syncpt_id,
+                       u32 syncpt_incrs,
+                       u32 *syncpt_value,
+                       bool null_kickoff)
 {
-       int i;
-       struct nvhost_op_pair* p;
+       struct nvhost_hwctx *hwctx_to_save = NULL;
+       u32 user_syncpt_incrs = syncpt_incrs;
+       bool need_restore = false;
+       u32 syncval;
+       int err;
 
-       /* schedule interrupts */
-       for (i = 0; i < num_intrs; i++) {
-               nvhost_intr_add_action(&ch->dev->intr, syncpt_id, intrs[i].syncpt_val,
-                               NVHOST_INTR_ACTION_CTXSAVE, intrs[i].intr_data, NULL);
+       /* keep module powered */
+       nvhost_module_busy(&channel->mod);
+
+       /* get submit lock */
+       err = mutex_lock_interruptible(&channel->submitlock);
+       if (err) {
+               nvhost_module_idle(&channel->mod);
+               return err;
+       }
+
+       /* context switch */
+       if (channel->cur_ctx != hwctx) {
+               hwctx_to_save = channel->cur_ctx;
+               if (hwctx_to_save) {
+                       syncpt_incrs += hwctx_to_save->save_incrs;
+                       hwctx_to_save->valid = true;
+                       channel->ctxhandler.get(hwctx_to_save);
+               }
+               channel->cur_ctx = hwctx;
+               if (channel->cur_ctx && channel->cur_ctx->valid) {
+                       need_restore = true;
+                       syncpt_incrs += channel->cur_ctx->restore_incrs;
+               }
        }
 
+       /* get absolute sync value */
+       if (BIT(syncpt_id) & NVSYNCPTS_CLIENT_MANAGED)
+               syncval = nvhost_syncpt_set_max(&channel->dev->syncpt,
+                                               syncpt_id, syncpt_incrs);
+       else
+               syncval = nvhost_syncpt_incr_max(&channel->dev->syncpt,
+                                               syncpt_id, syncpt_incrs);
+
        /* begin a CDMA submit */
-       nvhost_cdma_begin(&ch->cdma);
+       nvhost_cdma_begin(&channel->cdma);
+
+       /* push save buffer (pre-gather setup depends on unit) */
+       if (hwctx_to_save)
+               channel->ctxhandler.save_push(&channel->cdma, hwctx_to_save);
 
-       /* push ops */
-       for (i = 0, p = ops; i < num_pairs; i++, p++)
-               nvhost_cdma_push(&ch->cdma, p->op1, p->op2);
+       /* gather restore buffer */
+       if (need_restore)
+               nvhost_cdma_push(&channel->cdma,
+                       nvhost_opcode_gather(channel->cur_ctx->restore_size),
+                       channel->cur_ctx->restore_phys);
 
-       /* extra work to do for null kickoff */
-       if (num_nulled_incrs) {
-               u32 incr;
+       /* add a setclass for modules that require it (unless ctxsw added it) */
+       if (!hwctx_to_save && !need_restore && channel->desc->class)
+               nvhost_cdma_push(&channel->cdma,
+                       nvhost_opcode_setclass(channel->desc->class, 0, 0),
+                       NVHOST_OPCODE_NOOP);
+
+       if (null_kickoff) {
+               int incr;
                u32 op_incr;
 
                /* TODO ideally we'd also perform host waits here */
 
                /* push increments that correspond to nulled out commands */
                op_incr = nvhost_opcode_imm(0, 0x100 | syncpt_id);
-               for (incr = 0; incr < (num_nulled_incrs >> 1); incr++)
-                       nvhost_cdma_push(&ch->cdma, op_incr, op_incr);
-               if (num_nulled_incrs & 1)
-                       nvhost_cdma_push(&ch->cdma, op_incr, NVHOST_OPCODE_NOOP);
+               for (incr = 0; incr < (user_syncpt_incrs >> 1); incr++)
+                       nvhost_cdma_push(&channel->cdma, op_incr, op_incr);
+               if (user_syncpt_incrs & 1)
+                       nvhost_cdma_push(&channel->cdma,
+                                       op_incr, NVHOST_OPCODE_NOOP);
 
                /* for 3d, waitbase needs to be incremented after each submit */
-               if (ch->desc->class == NV_GRAPHICS_3D_CLASS_ID) {
-                       u32 op1 = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
-                                       NV_CLASS_HOST_INCR_SYNCPT_BASE, 1);
-                       u32 op2 = nvhost_class_host_incr_syncpt_base(NVWAITBASE_3D,
-                                       num_nulled_incrs);
-
-                       nvhost_cdma_push(&ch->cdma, op1, op2);
-               }
+               if (channel->desc->class == NV_GRAPHICS_3D_CLASS_ID)
+                       nvhost_cdma_push(&channel->cdma,
+                                       nvhost_opcode_setclass(
+                                               NV_HOST1X_CLASS_ID,
+                                               NV_CLASS_HOST_INCR_SYNCPT_BASE,
+                                               1),
+                                       nvhost_class_host_incr_syncpt_base(
+                                               NVWAITBASE_3D,
+                                               user_syncpt_incrs));
+       }
+       else {
+               /* push user gathers */
+               for ( ; gather != gather_end; gather += 2)
+                       nvhost_cdma_push(&channel->cdma,
+                                       nvhost_opcode_gather(gather[0]),
+                                       gather[1]);
        }
 
-       /* end CDMA submit & stash pinned hMems into sync queue for later cleanup */
-       nvhost_cdma_end(&ch->cdma, user_nvmap, syncpt_id, syncpt_val,
-                       unpins, num_unpins);
+       /* end CDMA submit & stash pinned hMems into sync queue */
+       nvhost_cdma_end(&channel->cdma, user_nvmap,
+                       syncpt_id, syncval, unpins, nr_unpins);
+
+       /*
+        * schedule a context save interrupt (to drain the host FIFO
+        * if necessary, and to release the restore buffer)
+        */
+       if (hwctx_to_save)
+               nvhost_intr_add_action(&channel->dev->intr, syncpt_id,
+                       syncval - syncpt_incrs + hwctx_to_save->save_thresh,
+                       NVHOST_INTR_ACTION_CTXSAVE, hwctx_to_save, NULL);
+
+       /* schedule a submit complete interrupt */
+       nvhost_intr_add_action(&channel->dev->intr, syncpt_id, syncval,
+                       NVHOST_INTR_ACTION_SUBMIT_COMPLETE, channel, NULL);
+
+       mutex_unlock(&channel->submitlock);
+
+       *syncpt_value = syncval;
+       return 0;
 }
 
 static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action)
@@ -237,42 +309,48 @@ static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action)
 static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 {
        struct nvhost_channel *ch = container_of(mod, struct nvhost_channel, mod);
+       struct nvhost_hwctx *hwctx_to_save;
+       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+       u32 syncpt_incrs, syncpt_val;
+       void *ref;
 
-       if (action == NVHOST_POWER_ACTION_OFF) {
-               mutex_lock(&ch->submitlock);
-               if (ch->cur_ctx) {
-                       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-                       struct nvhost_op_pair save;
-                       struct nvhost_cpuinterrupt ctxsw;
-                       u32 syncval;
-                       void *ref;
-                       syncval = nvhost_syncpt_incr_max(&ch->dev->syncpt,
-                                                       NVSYNCPT_3D,
-                                                       ch->cur_ctx->save_incrs);
-                       save.op1 = nvhost_opcode_gather(0, ch->cur_ctx->save_size);
-                       save.op2 = ch->cur_ctx->save_phys;
-                       ctxsw.intr_data = ch->cur_ctx;
-                       ctxsw.syncpt_val = syncval - 1;
-                       ch->cur_ctx->valid = true;
-                       ch->ctxhandler.get(ch->cur_ctx);
-                       ch->cur_ctx = NULL;
+       if (action != NVHOST_POWER_ACTION_OFF)
+               return;
 
-                       nvhost_channel_submit(ch, ch->dev->nvmap,
-                                             &save, 1, &ctxsw, 1, NULL, 0,
-                                             NVSYNCPT_3D, syncval, 0);
-
-                       nvhost_intr_add_action(&ch->dev->intr, NVSYNCPT_3D,
-                                              syncval,
-                                              NVHOST_INTR_ACTION_WAKEUP,
-                                              &wq, &ref);
-                       wait_event(wq,
-                                  nvhost_syncpt_min_cmp(&ch->dev->syncpt,
-                                                        NVSYNCPT_3D, syncval));
-                       nvhost_intr_put_ref(&ch->dev->intr, ref);
-                       nvhost_cdma_update(&ch->cdma);
-               }
+       mutex_lock(&ch->submitlock);
+       hwctx_to_save = ch->cur_ctx;
+       if (!hwctx_to_save) {
                mutex_unlock(&ch->submitlock);
+               return;
        }
+
+       hwctx_to_save->valid = true;
+       ch->ctxhandler.get(hwctx_to_save);
+       ch->cur_ctx = NULL;
+
+       syncpt_incrs = hwctx_to_save->save_incrs;
+       syncpt_val = nvhost_syncpt_incr_max(&ch->dev->syncpt,
+                                       NVSYNCPT_3D, syncpt_incrs);
+
+       nvhost_cdma_begin(&ch->cdma);
+       ch->ctxhandler.save_push(&ch->cdma, hwctx_to_save);
+       nvhost_cdma_end(&ch->cdma, ch->dev->nvmap, NVSYNCPT_3D, syncpt_val, NULL, 0);
+
+       nvhost_intr_add_action(&ch->dev->intr, NVSYNCPT_3D,
+                       syncpt_val - syncpt_incrs + hwctx_to_save->save_thresh,
+                       NVHOST_INTR_ACTION_CTXSAVE, hwctx_to_save, NULL);
+
+       nvhost_intr_add_action(&ch->dev->intr, NVSYNCPT_3D, syncpt_val,
+                       NVHOST_INTR_ACTION_WAKEUP, &wq, &ref);
+       wait_event(wq,
+               nvhost_syncpt_min_cmp(&ch->dev->syncpt,
+                               NVSYNCPT_3D, syncpt_val));
+
+       nvhost_intr_put_ref(&ch->dev->intr, ref);
+
+       nvhost_cdma_update(&ch->cdma);
+
+       mutex_unlock(&ch->submitlock);
 }
 
 static void power_mpe(struct nvhost_module *mod, enum nvhost_power_action action)
index cb117b5..41c6bdd 100644 (file)
@@ -76,13 +76,18 @@ int nvhost_channel_init(
        struct nvhost_channel *ch,
        struct nvhost_master *dev, int index);
 
-void nvhost_channel_submit(struct nvhost_channel *ch,
-                       struct nvmap_client *user_nvmap,
-                       struct nvhost_op_pair *ops, int num_pairs,
-                       struct nvhost_cpuinterrupt *intrs, int num_intrs,
-                       struct nvmap_handle **unpins, int num_unpins,
-                       u32 syncpt_id, u32 syncpt_val,
-                       int num_nulled_incrs);
+int nvhost_channel_submit(
+       struct nvhost_channel *channel,
+       struct nvhost_hwctx *hwctx,
+       struct nvmap_client *user_nvmap,
+       u32 *gather,
+       u32 *gather_end,
+       struct nvmap_handle **unpins,
+       int nr_unpins,
+       u32 syncpt_id,
+       u32 syncpt_incrs,
+       u32 *syncpt_value,
+       bool null_kickoff);
 
 struct nvhost_channel *nvhost_getchannel(struct nvhost_channel *ch);
 void nvhost_putchannel(struct nvhost_channel *ch, struct nvhost_hwctx *ctx);
index 7a5f4ce..4e7f968 100644 (file)
@@ -209,9 +209,9 @@ static inline u32 nvhost_opcode_restart(unsigned address)
        return (5 << 28) | (address >> 4);
 }
 
-static inline u32 nvhost_opcode_gather(unsigned offset, unsigned count)
+static inline u32 nvhost_opcode_gather(unsigned count)
 {
-       return (6 << 28) | (offset << 16) | count;
+       return (6 << 28) | count;
 }
 
 static inline u32 nvhost_opcode_gather_nonincr(unsigned offset,        unsigned count)
@@ -226,5 +226,10 @@ static inline u32 nvhost_opcode_gather_incr(unsigned offset, unsigned count)
 
 #define NVHOST_OPCODE_NOOP nvhost_opcode_nonincr(0, 0)
 
+static inline u32 nvhost_mask2(unsigned x, unsigned y)
+{
+       return 1 | (1 << (y - x));
+}
+
 #endif /* __NVHOST_HARDWARE_H */
 
index 4fce8d1..ec0c955 100644 (file)
@@ -30,6 +30,7 @@
 #include <mach/nvmap.h>
 
 struct nvhost_channel;
+struct nvhost_cdma;
 
 struct nvhost_hwctx {
        struct kref ref;
@@ -38,12 +39,11 @@ struct nvhost_hwctx {
        bool valid;
 
        struct nvmap_handle_ref *save;
-       u32 save_phys;
-       u32 save_size;
        u32 save_incrs;
-       void *save_cpu_data;
+       u32 save_thresh;
 
        struct nvmap_handle_ref *restore;
+       u32 *restore_virt;
        u32 restore_phys;
        u32 restore_size;
        u32 restore_incrs;
@@ -53,14 +53,16 @@ struct nvhost_hwctx_handler {
        struct nvhost_hwctx * (*alloc) (struct nvhost_channel *ch);
        void (*get) (struct nvhost_hwctx *ctx);
        void (*put) (struct nvhost_hwctx *ctx);
+       void (*save_push) (struct nvhost_cdma *cdma, struct nvhost_hwctx *ctx);
        void (*save_service) (struct nvhost_hwctx *ctx);
 };
 
 int nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h);
 int nvhost_mpectx_handler_init(struct nvhost_hwctx_handler *h);
 
-static inline int nvhost_hwctx_handler_init(struct nvhost_hwctx_handler *h,
-                                            const char *module)
+static inline int nvhost_hwctx_handler_init(
+       struct nvhost_hwctx_handler *h,
+       const char *module)
 {
        if (strcmp(module, "gr3d") == 0)
                return nvhost_3dctx_handler_init(h);
@@ -74,15 +76,15 @@ struct hwctx_reginfo {
        unsigned int offset:12;
        unsigned int count:16;
        unsigned int type:2;
+       unsigned int version:2;
 };
 
 enum {
        HWCTX_REGINFO_DIRECT = 0,
        HWCTX_REGINFO_INDIRECT,
-       HWCTX_REGINFO_INDIRECT_OFFSET,
-       HWCTX_REGINFO_INDIRECT_DATA
+       HWCTX_REGINFO_INDIRECT_4X
 };
 
-#define HWCTX_REGINFO(offset, count, type) {offset, count, HWCTX_REGINFO_##type}
+#define HWCTX_REGINFO(version, offset, count, type) {offset, count, HWCTX_REGINFO_##type, version}
 
 #endif
index 8acd098..c191a37 100644 (file)
@@ -181,7 +181,8 @@ static void action_ctxsave(struct nvhost_waitlist *waiter)
        struct nvhost_hwctx *hwctx = waiter->data;
        struct nvhost_channel *channel = hwctx->channel;
 
-       channel->ctxhandler.save_service(hwctx);
+       if (channel->ctxhandler.save_service)
+               channel->ctxhandler.save_service(hwctx);
        channel->ctxhandler.put(hwctx);
 }