gpu: nvgpu: support gk20a virtualization
Aingara Paramakuru [Tue, 6 May 2014 01:14:22 +0000 (21:14 -0400)]
The nvgpu driver now supports using the Tegra graphics virtualization
interfaces to support gk20a in a virtualized environment.

Bug 1509608

Change-Id: I6ede15ee7bf0b0ad8a13e8eb5f557c3516ead676
Signed-off-by: Aingara Paramakuru <aparamakuru@nvidia.com>
Reviewed-on: http://git-master/r/440122
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>

24 files changed:
drivers/gpu/nvgpu/Makefile
drivers/gpu/nvgpu/gk20a/Makefile
drivers/gpu/nvgpu/gk20a/as_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.c
drivers/gpu/nvgpu/gk20a/channel_gk20a.h
drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
drivers/gpu/nvgpu/gk20a/gk20a.c
drivers/gpu/nvgpu/gk20a/gk20a.h
drivers/gpu/nvgpu/gk20a/gr_gk20a.c
drivers/gpu/nvgpu/gk20a/mm_gk20a.c
drivers/gpu/nvgpu/gk20a/mm_gk20a.h
drivers/gpu/nvgpu/gk20a/platform_gk20a.h
drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c [new file with mode: 0644]
drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
drivers/gpu/nvgpu/gm20b/gr_gm20b.c
drivers/gpu/nvgpu/gm20b/mm_gm20b.c
drivers/gpu/nvgpu/vgpu/Makefile [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/fifo_vgpu.c [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/gr_vgpu.c [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/ltc_vgpu.c [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/mm_vgpu.c [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/vgpu.c [new file with mode: 0644]
drivers/gpu/nvgpu/vgpu/vgpu.h [new file with mode: 0644]

index 0fb6090..6544b31 100644 (file)
@@ -5,3 +5,4 @@ ccflags-y += -Werror
 
 obj-$(CONFIG_GK20A)    += gk20a/
 obj-$(CONFIG_GK20A)    += gm20b/
+obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += vgpu/
index aa9237b..fbc9cbe 100644 (file)
@@ -39,5 +39,6 @@ nvgpu-y := \
        tsg_gk20a.o
 nvgpu-$(CONFIG_TEGRA_GK20A) += platform_gk20a_tegra.o
 nvgpu-$(CONFIG_SYNC) += sync_gk20a.o
+nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += platform_vgpu_tegra.o
 
 obj-$(CONFIG_GK20A) := nvgpu.o
index 4849dbd..1a1ca8f 100644 (file)
@@ -40,6 +40,7 @@ static void release_as_share_id(struct gk20a_as *as, int id)
 static int gk20a_as_alloc_share(struct gk20a_as *as,
                                struct gk20a_as_share **out)
 {
+       struct gk20a *g = gk20a_from_as(as);
        struct gk20a_as_share *as_share;
        int err = 0;
 
@@ -55,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as,
        as_share->ref_cnt.counter = 1;
 
        /* this will set as_share->vm. */
-       err = gk20a_vm_alloc_share(as_share);
+       err = g->ops.mm.vm_alloc_share(as_share);
        if (err)
                goto failed;
 
@@ -106,7 +107,7 @@ static int gk20a_as_ioctl_bind_channel(
        atomic_inc(&as_share->ref_cnt);
 
        /* this will set channel_gk20a->vm */
-       err = gk20a_vm_bind_channel(as_share, ch);
+       err = ch->g->ops.mm.vm_bind_channel(as_share, ch);
        if (err) {
                atomic_dec(&as_share->ref_cnt);
                return err;
index 4575788..669ec29 100644 (file)
@@ -56,16 +56,9 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
 
 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-                       u64 gpfifo_base, u32 gpfifo_entries);
 
 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
 
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-                               struct channel_gk20a *ch);
-static void channel_gk20a_free_inst(struct gk20a *g,
-                               struct channel_gk20a *ch);
-
 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
                                        bool add);
 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
@@ -173,12 +166,10 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
                return -ENOMEM;
 
        /* disable channel */
-       gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
-               gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
-               ccsr_channel_enable_clr_true_f());
+       c->g->ops.fifo.disable_channel(c);
 
        /* preempt the channel */
-       WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
+       WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
 
        /* value field is 8 bits long */
        while (value >= 1 << 8) {
@@ -206,8 +197,8 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
        return 0;
 }
 
-static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
-                               u64 gpfifo_base, u32 gpfifo_entries)
+int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+                       u64 gpfifo_base, u32 gpfifo_entries)
 {
        void *inst_ptr;
 
@@ -269,7 +260,7 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
 
        gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
 
-       return 0;
+       return channel_gk20a_commit_userd(c);
 }
 
 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
@@ -347,8 +338,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
        }
 }
 
-static int channel_gk20a_alloc_inst(struct gk20a *g,
-                               struct channel_gk20a *ch)
+int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
        struct device *d = dev_from_gk20a(g);
        int err = 0;
@@ -384,12 +374,11 @@ static int channel_gk20a_alloc_inst(struct gk20a *g,
 
 clean_up:
        gk20a_err(d, "fail");
-       channel_gk20a_free_inst(g, ch);
+       g->ops.fifo.free_inst(g, ch);
        return err;
 }
 
-static void channel_gk20a_free_inst(struct gk20a *g,
-                               struct channel_gk20a *ch)
+void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
        struct device *d = dev_from_gk20a(g);
 
@@ -403,7 +392,16 @@ static void channel_gk20a_free_inst(struct gk20a *g,
 
 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 {
-       return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
+       return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
+}
+
+void channel_gk20a_disable(struct channel_gk20a *ch)
+{
+       /* disable channel */
+       gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+               gk20a_readl(ch->g,
+                       ccsr_channel_r(ch->hw_chid)) |
+                       ccsr_channel_enable_clr_true_f());
 }
 
 void gk20a_channel_abort(struct channel_gk20a *ch)
@@ -426,11 +424,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch)
        }
        mutex_unlock(&ch->jobs_lock);
 
-       /* disable channel */
-       gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
-                    gk20a_readl(ch->g,
-                    ccsr_channel_r(ch->hw_chid)) |
-                    ccsr_channel_enable_clr_true_f());
+       ch->g->ops.fifo.disable_channel(ch);
 
        if (released_job_semaphore) {
                wake_up_interruptible_all(&ch->semaphore_wq);
@@ -479,7 +473,7 @@ void gk20a_disable_channel(struct channel_gk20a *ch,
        gk20a_wait_channel_idle(ch);
 
        /* preempt the channel */
-       gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
+       ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
 
        /* remove channel from runlist */
        channel_gk20a_update_runlist(ch, false);
@@ -643,7 +637,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
        gk20a_free_error_notifiers(ch);
 
        /* release channel ctx */
-       gk20a_free_channel_ctx(ch);
+       g->ops.gr.free_channel_ctx(ch);
 
        gk20a_gr_flush_channel_tlb(gr);
 
@@ -683,8 +677,8 @@ unbind:
        if (gk20a_is_channel_marked_as_tsg(ch))
                gk20a_tsg_unbind_channel(ch);
 
-       channel_gk20a_unbind(ch);
-       channel_gk20a_free_inst(g, ch);
+       g->ops.fifo.unbind_channel(ch);
+       g->ops.fifo.free_inst(g, ch);
 
        ch->vpr = false;
        ch->vm = NULL;
@@ -747,7 +741,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 
        ch->g = g;
 
-       if (channel_gk20a_alloc_inst(g, ch)) {
+       if (g->ops.fifo.alloc_inst(g, ch)) {
                ch->in_use = false;
                gk20a_err(dev_from_gk20a(g),
                           "failed to open gk20a channel, out of inst mem");
@@ -1097,7 +1091,6 @@ static void recycle_priv_cmdbuf(struct channel_gk20a *c)
        gk20a_dbg_fn("done");
 }
 
-
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                               struct nvhost_alloc_gpfifo_args *args)
 {
@@ -1181,10 +1174,11 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
                c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
 
-       channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
-
        channel_gk20a_setup_userd(c);
-       channel_gk20a_commit_userd(c);
+
+       err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+       if (err)
+               goto clean_up_unmap;
 
        /* TBD: setup engine contexts */
 
@@ -1550,7 +1544,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        /* We don't know what context is currently running...                */
        /* Note also: there can be more than one context associated with the */
        /* address space (vm).   */
-       gk20a_mm_tlb_invalidate(c->vm);
+       g->ops.mm.tlb_invalidate(c->vm);
 
        /* Make sure we have enough space for gpfifo entries. If not,
         * wait for signals from completed submits */
@@ -1929,7 +1923,7 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
 
        gk20a_dbg_fn("");
 
-       return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
+       return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
                                args->gpu_va, args->mode);
 }
 
@@ -1945,7 +1939,7 @@ int gk20a_channel_suspend(struct gk20a *g)
        gk20a_dbg_fn("");
 
        /* wait for engine idle */
-       err = gk20a_fifo_wait_engine_idle(g);
+       err = g->ops.fifo.wait_engine_idle(g);
        if (err)
                return err;
 
@@ -1954,22 +1948,20 @@ int gk20a_channel_suspend(struct gk20a *g)
 
                        gk20a_dbg_info("suspend channel %d", chid);
                        /* disable channel */
-                       gk20a_writel(g, ccsr_channel_r(chid),
-                               gk20a_readl(g, ccsr_channel_r(chid)) |
-                               ccsr_channel_enable_clr_true_f());
+                       g->ops.fifo.disable_channel(&f->channel[chid]);
                        /* preempt the channel */
-                       gk20a_fifo_preempt_channel(g, chid);
+                       g->ops.fifo.preempt_channel(g, chid);
 
                        channels_in_use = true;
                }
        }
 
        if (channels_in_use) {
-               gk20a_fifo_update_runlist(g, 0, ~0, false, true);
+               g->ops.fifo.update_runlist(g, 0, ~0, false, true);
 
                for (chid = 0; chid < f->num_channels; chid++) {
                        if (f->channel[chid].in_use)
-                               channel_gk20a_unbind(&f->channel[chid]);
+                               g->ops.fifo.unbind_channel(&f->channel[chid]);
                }
        }
 
@@ -1996,7 +1988,7 @@ int gk20a_channel_resume(struct gk20a *g)
        }
 
        if (channels_in_use)
-               gk20a_fifo_update_runlist(g, 0, ~0, true, true);
+               g->ops.fifo.update_runlist(g, 0, ~0, true, true);
 
        gk20a_dbg_fn("done");
        return 0;
@@ -2074,6 +2066,11 @@ clean_up:
 void gk20a_init_channel(struct gpu_ops *gops)
 {
        gops->fifo.bind_channel = channel_gk20a_bind;
+       gops->fifo.unbind_channel = channel_gk20a_unbind;
+       gops->fifo.disable_channel = channel_gk20a_disable;
+       gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
+       gops->fifo.free_inst = channel_gk20a_free_inst;
+       gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
 }
 
 long gk20a_channel_ioctl(struct file *filp,
@@ -2144,7 +2141,7 @@ long gk20a_channel_ioctl(struct file *filp,
                                __func__, cmd);
                        return err;
                }
-               err = gk20a_alloc_obj_ctx(ch,
+               err = ch->g->ops.gr.alloc_obj_ctx(ch,
                                (struct nvhost_alloc_obj_ctx_args *)buf);
                gk20a_idle(dev);
                break;
@@ -2156,7 +2153,7 @@ long gk20a_channel_ioctl(struct file *filp,
                                __func__, cmd);
                        return err;
                }
-               err = gk20a_free_obj_ctx(ch,
+               err = ch->g->ops.gr.free_obj_ctx(ch,
                                (struct nvhost_free_obj_ctx_args *)buf);
                gk20a_idle(dev);
                break;
index 2ea3ecc..37ca824 100644 (file)
@@ -144,6 +144,10 @@ struct channel_gk20a {
        void *error_notifier_va;
 
        struct gk20a_channel_sync *sync;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       u64 virt_ctx;
+#endif
 };
 
 static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
@@ -193,4 +197,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
                               struct nvhost_alloc_gpfifo_args *args);
 
+void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
+void channel_gk20a_disable(struct channel_gk20a *ch);
+int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
+void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
+int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+                       u64 gpfifo_base, u32 gpfifo_entries);
+
 #endif /*__CHANNEL_GK20A_H__*/
index e5628c3..7338f84 100644 (file)
@@ -158,6 +158,9 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
        struct zbc_entry *zbc_val;
        struct zbc_query_params *zbc_tbl;
        int i, err = 0;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       struct gk20a_platform *platform = platform_get_drvdata(dev);
+#endif
 
        gk20a_dbg_fn("");
 
@@ -197,7 +200,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
                if (zcull_info == NULL)
                        return -ENOMEM;
 
-               err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info);
+               err = g->ops.gr.get_zcull_info(g, &g->gr, zcull_info);
                if (err) {
                        kfree(zcull_info);
                        break;
@@ -219,6 +222,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
        case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
                set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+               if (platform->virtual_dev)
+                       return -ENOMEM;
+#endif
+
                zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
                if (zbc_val == NULL)
                        return -ENOMEM;
index 4363129..e6b3fd5 100644 (file)
@@ -1173,7 +1173,7 @@ void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
 
                gk20a_channel_abort(ch);
                for (i = 0; i < g->fifo.max_runlists; i++)
-                       gk20a_fifo_update_runlist(g, i,
+                       g->ops.fifo.update_runlist(g, i,
                                        hw_chid, false, false);
 
                if (gk20a_fifo_set_ctx_mmu_error(g, ch))
@@ -1620,7 +1620,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
                pbdma_chid = fifo_pbdma_status_next_id_v(pbdma_stat);
 
        if (pbdma_chid != ~0) {
-               err = gk20a_fifo_preempt_channel(g, pbdma_chid);
+               err = g->ops.fifo.preempt_channel(g, pbdma_chid);
                if (err)
                        goto clean_up;
        }
@@ -1636,7 +1636,7 @@ int gk20a_fifo_disable_engine_activity(struct gk20a *g,
                engine_chid = fifo_engine_status_next_id_v(eng_stat);
 
        if (engine_chid != ~0 && engine_chid != pbdma_chid) {
-               err = gk20a_fifo_preempt_channel(g, engine_chid);
+               err = g->ops.fifo.preempt_channel(g, engine_chid);
                if (err)
                        goto clean_up;
        }
@@ -1960,6 +1960,9 @@ static void gk20a_fifo_apply_pb_timeout(struct gk20a *g)
 void gk20a_init_fifo(struct gpu_ops *gops)
 {
        gk20a_init_channel(gops);
+       gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
+       gops->fifo.update_runlist = gk20a_fifo_update_runlist;
        gops->fifo.trigger_mmu_fault = gk20a_fifo_trigger_mmu_fault;
        gops->fifo.apply_pb_timeout = gk20a_fifo_apply_pb_timeout;
+       gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 }
index c2c1615..d0c2510 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/tegra-powergate.h>
 #include <linux/tegra_pm_domains.h>
 #include <linux/clk/tegra.h>
+#include <linux/kthread.h>
 
 #include <linux/sched.h>
 #include <linux/input-cfboost.h>
@@ -61,6 +62,9 @@
 #include "dbg_gpu_gk20a.h"
 #include "hal.h"
 #include "nvhost_acm.h"
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+#include "vgpu/vgpu.h"
+#endif
 
 #ifdef CONFIG_ARM64
 #define __cpuc_flush_dcache_area __flush_dcache_area
@@ -738,6 +742,17 @@ static int gk20a_init_client(struct platform_device *dev)
 
        gk20a_dbg_fn("");
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       {
+               struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+               if (platform->virtual_dev) {
+                       err = vgpu_pm_finalize_poweron(&dev->dev);
+                       if (err)
+                               return err;
+               }
+       }
+#endif
 #ifndef CONFIG_PM_RUNTIME
        gk20a_pm_finalize_poweron(&dev->dev);
 #endif
@@ -754,6 +769,16 @@ static int gk20a_init_client(struct platform_device *dev)
 static void gk20a_deinit_client(struct platform_device *dev)
 {
        gk20a_dbg_fn("");
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       {
+               struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+               if (platform->virtual_dev) {
+                       vgpu_pm_prepare_poweroff(&dev->dev);
+                       return;
+               }
+       }
+#endif
 #ifndef CONFIG_PM_RUNTIME
        gk20a_pm_prepare_poweroff(&dev->dev);
 #endif
@@ -1007,6 +1032,10 @@ static struct of_device_id tegra_gk20a_of_match[] = {
                .data = &gk20a_tegra_platform },
        { .compatible = "nvidia,tegra210-gm20b",
                .data = &gm20b_tegra_platform },
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       { .compatible = "nvidia,tegra124-gk20a-vgpu",
+               .data = &vgpu_tegra_platform },
+#endif
 #else
        { .compatible = "nvidia,tegra124-gk20a",
                .data = &gk20a_generic_platform },
@@ -1058,7 +1087,7 @@ static int gk20a_create_device(
        return 0;
 }
 
-static void gk20a_user_deinit(struct platform_device *dev)
+void gk20a_user_deinit(struct platform_device *dev)
 {
        struct gk20a *g = get_gk20a(dev);
 
@@ -1099,7 +1128,7 @@ static void gk20a_user_deinit(struct platform_device *dev)
                class_destroy(g->class);
 }
 
-static int gk20a_user_init(struct platform_device *dev)
+int gk20a_user_init(struct platform_device *dev)
 {
        int err;
        dev_t devno;
@@ -1404,6 +1433,11 @@ static int gk20a_probe(struct platform_device *dev)
 
        platform_set_drvdata(dev, platform);
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       if (platform->virtual_dev)
+               return vgpu_probe(dev);
+#endif
+
        gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
        if (!gk20a) {
                dev_err(&dev->dev, "couldn't allocate gk20a support");
@@ -1547,8 +1581,16 @@ static int gk20a_probe(struct platform_device *dev)
 static int __exit gk20a_remove(struct platform_device *dev)
 {
        struct gk20a *g = get_gk20a(dev);
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       struct gk20a_platform *platform = gk20a_get_platform(dev);
+#endif
        gk20a_dbg_fn("");
 
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       if (platform->virtual_dev)
+               return vgpu_remove(dev);
+#endif
+
 #ifdef CONFIG_INPUT_CFBOOST
        if (g->boost_added)
                cfb_remove_device(&dev->dev);
index a1080f0..b813541 100644 (file)
@@ -131,6 +131,16 @@ struct gpu_ops {
                                u32 reg_offset);
                int (*load_ctxsw_ucode)(struct gk20a *g);
                u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
+               void (*free_channel_ctx)(struct channel_gk20a *c);
+               int (*alloc_obj_ctx)(struct channel_gk20a  *c,
+                               struct nvhost_alloc_obj_ctx_args *args);
+               int (*free_obj_ctx)(struct channel_gk20a  *c,
+                               struct nvhost_free_obj_ctx_args *args);
+               int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
+                               struct channel_gk20a *c, u64 zcull_va,
+                               u32 mode);
+               int (*get_zcull_info)(struct gk20a *g, struct gr_gk20a *gr,
+                               struct gr_zcull_info *zcull_params);
        } gr;
        const char *name;
        struct {
@@ -148,9 +158,20 @@ struct gpu_ops {
        } clock_gating;
        struct {
                void (*bind_channel)(struct channel_gk20a *ch_gk20a);
+               void (*unbind_channel)(struct channel_gk20a *ch_gk20a);
+               void (*disable_channel)(struct channel_gk20a *ch);
+               int (*alloc_inst)(struct gk20a *g, struct channel_gk20a *ch);
+               void (*free_inst)(struct gk20a *g, struct channel_gk20a *ch);
+               int (*setup_ramfc)(struct channel_gk20a *c, u64 gpfifo_base,
+                               u32 gpfifo_entries);
+               int (*preempt_channel)(struct gk20a *g, u32 hw_chid);
+               int (*update_runlist)(struct gk20a *g, u32 runlist_id,
+                               u32 hw_chid, bool add,
+                               bool wait_for_finish);
                void (*trigger_mmu_fault)(struct gk20a *g,
                                unsigned long engine_ids);
                void (*apply_pb_timeout)(struct gk20a *g);
+               int (*wait_engine_idle)(struct gk20a *g);
        } fifo;
        struct pmu_v {
                /*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -241,6 +262,31 @@ struct gpu_ops {
                void (*clear_sparse)(struct vm_gk20a *vm, u64 vaddr,
                               u64 size, u32 pgsz_idx);
                bool (*is_debug_mode_enabled)(struct gk20a *g);
+               u64 (*gmmu_map)(struct vm_gk20a *vm,
+                               u64 map_offset,
+                               struct sg_table *sgt,
+                               u64 buffer_offset,
+                               u64 size,
+                               int pgsz_idx,
+                               u8 kind_v,
+                               u32 ctag_offset,
+                               u32 flags,
+                               int rw_flag,
+                               bool clear_ctags);
+               void (*gmmu_unmap)(struct vm_gk20a *vm,
+                               u64 vaddr,
+                               u64 size,
+                               int pgsz_idx,
+                               bool va_allocated,
+                               int rw_flag);
+               void (*vm_remove)(struct vm_gk20a *vm);
+               int (*vm_alloc_share)(struct gk20a_as_share *as_share);
+               int (*vm_bind_channel)(struct gk20a_as_share *as_share,
+                               struct channel_gk20a *ch);
+               int (*fb_flush)(struct gk20a *g);
+               void (*l2_invalidate)(struct gk20a *g);
+               void (*l2_flush)(struct gk20a *g, bool invalidate);
+               void (*tlb_invalidate)(struct vm_gk20a *vm);
        } mm;
        struct {
                int (*prepare_ucode)(struct gk20a *g);
@@ -648,4 +694,7 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name);
 
 int gk20a_init_gpu_characteristics(struct gk20a *g);
 
+int gk20a_user_init(struct platform_device *dev);
+void gk20a_user_deinit(struct platform_device *dev);
+
 #endif /* _NVHOST_GK20A_H_ */
index ef7776d..892a138 100644 (file)
@@ -825,7 +825,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
                }
        }
 
-       gk20a_mm_fb_flush(g);
+       g->ops.mm.fb_flush(g);
 
        gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
                 ch_ctx->zcull_ctx.ctx_sw_mode);
@@ -7077,4 +7077,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
        gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
        gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
        gops->gr.get_gpc_tpc_mask = gr_gk20a_get_gpc_tpc_mask;
+       gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
+       gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
+       gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
+       gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
+       gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 }
index 654938b..3feb675 100644 (file)
@@ -88,7 +88,6 @@ static inline u32 lo32(u64 f)
        return (u32)(f & 0xffffffff);
 }
 
-static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
 static struct mapped_buffer_node *find_mapped_buffer_locked(
                                        struct rb_root *root, u64 addr);
 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
@@ -100,7 +99,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
                                   u64 first_vaddr, u64 last_vaddr,
                                   u8 kind_v, u32 ctag_offset, bool cacheable,
                                   int rw_flag);
-static void gk20a_vm_remove_support(struct vm_gk20a *vm);
 static int gk20a_init_system_vm(struct mm_gk20a *mm);
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm);
 
@@ -335,6 +333,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g)
        gk20a_init_bar1_vm(mm);
        gk20a_init_system_vm(mm);
 
+       /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */
+       g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share;
        mm->remove_support = gk20a_remove_mm_support;
        mm->sw_ready = true;
 
@@ -833,9 +833,9 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
        mutex_unlock(&vm->update_gmmu_lock);
 }
 
-static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
-                            u64 size,
-                            enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
+                    u64 size,
+                    enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
 
 {
        struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
@@ -881,9 +881,9 @@ static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
        return offset;
 }
 
-static int gk20a_vm_free_va(struct vm_gk20a *vm,
-                            u64 offset, u64 size,
-                            enum gmmu_pgsz_gk20a pgsz_idx)
+int gk20a_vm_free_va(struct vm_gk20a *vm,
+                    u64 offset, u64 size,
+                    enum gmmu_pgsz_gk20a pgsz_idx)
 {
        struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
        u32 page_size = gmmu_page_sizes[pgsz_idx];
@@ -1100,21 +1100,32 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
        return 0;
 }
 
-static u64 __locked_gmmu_map(struct vm_gk20a *vm,
-                               u64 map_offset,
-                               struct sg_table *sgt,
-                               u64 buffer_offset,
-                               u64 size,
-                               int pgsz_idx,
-                               u8 kind_v,
-                               u32 ctag_offset,
-                               u32 flags,
-                               int rw_flag)
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+                       u64 map_offset,
+                       struct sg_table *sgt,
+                       u64 buffer_offset,
+                       u64 size,
+                       int pgsz_idx,
+                       u8 kind_v,
+                       u32 ctag_offset,
+                       u32 flags,
+                       int rw_flag,
+                       bool clear_ctags)
 {
        int err = 0, i = 0;
        bool allocated = false;
        u32 pde_lo, pde_hi;
        struct device *d = dev_from_vm(vm);
+       struct gk20a *g = gk20a_from_vm(vm);
+
+       if (clear_ctags && ctag_offset) {
+               u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >>
+                                       COMP_TAG_LINE_SIZE_SHIFT;
+
+               /* init/clear the ctag buffer */
+               g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+                               ctag_offset, ctag_offset + ctag_lines - 1);
+       }
 
        /* Allocate (or validate when map_offset != 0) the virtual address. */
        if (!map_offset) {
@@ -1167,12 +1178,12 @@ fail_alloc:
        return 0;
 }
 
-static void __locked_gmmu_unmap(struct vm_gk20a *vm,
-                               u64 vaddr,
-                               u64 size,
-                               int pgsz_idx,
-                               bool va_allocated,
-                               int rw_flag)
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+                       u64 vaddr,
+                       u64 size,
+                       int pgsz_idx,
+                       bool va_allocated,
+                       int rw_flag)
 {
        int err = 0;
        struct gk20a *g = gk20a_from_vm(vm);
@@ -1298,6 +1309,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        struct buffer_attrs bfr = {0};
        struct gk20a_comptags comptags;
        u64 buf_addr;
+       bool clear_ctags = false;
 
        mutex_lock(&vm->update_gmmu_lock);
 
@@ -1402,11 +1414,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
                        bfr.kind_v = bfr.uc_kind_v;
                } else {
                        gk20a_get_comptags(d, dmabuf, &comptags);
-
-                       /* init/clear the ctag buffer */
-                       g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-                                         comptags.offset,
-                                         comptags.offset + comptags.lines - 1);
+                       clear_ctags = true;
                }
        }
 
@@ -1414,15 +1422,15 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
        bfr.ctag_offset = comptags.offset;
 
        /* update gmmu ptes */
-       map_offset = __locked_gmmu_map(vm, map_offset,
+       map_offset = g->ops.mm.gmmu_map(vm, map_offset,
                                        bfr.sgt,
                                        buffer_offset, /* sg offset */
                                        mapping_size,
                                        bfr.pgsz_idx,
                                        bfr.kind_v,
                                        bfr.ctag_offset,
-                                       flags, rw_flag);
-
+                                       flags, rw_flag,
+                                       clear_ctags);
        if (!map_offset)
                goto clean_up;
 
@@ -1531,17 +1539,18 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
                u32 flags,
                int rw_flag)
 {
+       struct gk20a *g = gk20a_from_vm(vm);
        u64 vaddr;
 
        mutex_lock(&vm->update_gmmu_lock);
-       vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
+       vaddr = g->ops.mm.gmmu_map(vm, 0, /* already mapped? - No */
                                *sgt, /* sg table */
                                0, /* sg offset */
                                size,
                                0, /* page size index = 0 i.e. SZ_4K */
                                0, /* kind */
                                0, /* ctag_offset */
-                               flags, rw_flag);
+                               flags, rw_flag, false);
        mutex_unlock(&vm->update_gmmu_lock);
        if (!vaddr) {
                gk20a_err(dev_from_vm(vm), "failed to allocate va space");
@@ -1549,7 +1558,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
        }
 
        /* Invalidate kernel mappings immediately */
-       gk20a_mm_tlb_invalidate(vm);
+       g->ops.mm.tlb_invalidate(vm);
 
        return vaddr;
 }
@@ -1573,8 +1582,10 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
                u64 size,
                int rw_flag)
 {
+       struct gk20a *g = gk20a_from_vm(vm);
+
        mutex_lock(&vm->update_gmmu_lock);
-       __locked_gmmu_unmap(vm,
+       g->ops.mm.gmmu_unmap(vm,
                        vaddr,
                        size,
                        0, /* page size 4K */
@@ -1970,10 +1981,10 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
        }
 
        for (i = 0; i < num_pages; i++) {
-               u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
+               u64 page_vaddr = g->ops.mm.gmmu_map(vm, vaddr,
                        vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
                        NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
-                       gk20a_mem_flag_none);
+                       gk20a_mem_flag_none, false);
 
                if (!page_vaddr) {
                        gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
@@ -1990,7 +2001,7 @@ err_unmap:
        /* something went wrong. unmap pages */
        while (i--) {
                vaddr -= pgsz;
-               __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
+               g->ops.mm.gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
                                    gk20a_mem_flag_none);
        }
 
@@ -2005,12 +2016,14 @@ static int gk20a_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
 
 void gk20a_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
                               u64 size, u32 pgsz_idx) {
-       __locked_gmmu_unmap(vm, vaddr, size, pgsz_idx,
-                               false, gk20a_mem_flag_none);
+       struct gk20a *g = vm->mm->g;
+
+       g->ops.mm.gmmu_unmap(vm, vaddr, size, pgsz_idx,
+                       false, gk20a_mem_flag_none);
 }
 
 /* NOTE! mapped_buffers lock must be held */
-static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
+void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
 {
        struct vm_gk20a *vm = mapped_buffer->vm;
        struct gk20a *g = vm->mm->g;
@@ -2026,7 +2039,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                if (g->ops.mm.put_empty) {
                        g->ops.mm.put_empty(vm, vaddr, num_pages, pgsz_idx);
                } else {
-                       __locked_gmmu_unmap(vm,
+                       g->ops.mm.gmmu_unmap(vm,
                                mapped_buffer->addr,
                                mapped_buffer->size,
                                mapped_buffer->pgsz_idx,
@@ -2036,7 +2049,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
                                        num_pages, pgsz_idx, false);
                }
        } else
-               __locked_gmmu_unmap(vm,
+               g->ops.mm.gmmu_unmap(vm,
                                mapped_buffer->addr,
                                mapped_buffer->size,
                                mapped_buffer->pgsz_idx,
@@ -2085,7 +2098,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
        mutex_unlock(&vm->update_gmmu_lock);
 }
 
-static void gk20a_vm_remove_support(struct vm_gk20a *vm)
+void gk20a_vm_remove_support(struct vm_gk20a *vm)
 {
        struct gk20a *g = vm->mm->g;
        struct mapped_buffer_node *mapped_buffer;
@@ -2156,7 +2169,8 @@ static void gk20a_vm_remove_support(struct vm_gk20a *vm)
 static void gk20a_vm_remove_support_kref(struct kref *ref)
 {
        struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
-       gk20a_vm_remove_support(vm);
+       struct gk20a *g = gk20a_from_vm(vm);
+       g->ops.mm.vm_remove(vm);
 }
 
 void gk20a_vm_get(struct vm_gk20a *vm)
@@ -3124,5 +3138,14 @@ void gk20a_init_mm(struct gpu_ops *gops)
        gops->mm.put_empty = gk20a_vm_put_empty;
        gops->mm.clear_sparse = gk20a_vm_clear_sparse;
        gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
+       gops->mm.gmmu_map = gk20a_locked_gmmu_map;
+       gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
+       gops->mm.vm_remove = gk20a_vm_remove_support;
+       gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
+       gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
+       gops->mm.fb_flush = gk20a_mm_fb_flush;
+       gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
+       gops->mm.l2_flush = gk20a_mm_l2_flush;
+       gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 }
 
index b8726c6..f06c465 100644 (file)
@@ -318,6 +318,10 @@ struct vm_gk20a {
        dma_addr_t zero_page_iova;
        void *zero_page_cpuva;
        struct sg_table *zero_page_sgt;
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       u64 handle;
+#endif
 };
 
 struct gk20a;
@@ -438,11 +442,30 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
                u32 flags,
                int rw_flag);
 
+u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
+                       u64 map_offset,
+                       struct sg_table *sgt,
+                       u64 buffer_offset,
+                       u64 size,
+                       int pgsz_idx,
+                       u8 kind_v,
+                       u32 ctag_offset,
+                       u32 flags,
+                       int rw_flag,
+                       bool clear_ctags);
+
 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
                u64 vaddr,
                u64 size,
                int rw_flag);
 
+void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
+                       u64 vaddr,
+                       u64 size,
+                       int pgsz_idx,
+                       bool va_allocated,
+                       int rw_flag);
+
 struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
                    struct sg_table *sgt);
@@ -461,6 +484,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 /* unmap handle from kernel */
 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
 
+void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
+
 /* get reference to all currently mapped buffers */
 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
                         struct mapped_buffer_node ***mapped_buffers,
@@ -482,6 +507,16 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 void gk20a_vm_get(struct vm_gk20a *vm);
 void gk20a_vm_put(struct vm_gk20a *vm);
 
+void gk20a_vm_remove_support(struct vm_gk20a *vm);
+
+u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
+                    u64 size,
+                    enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
+
+int gk20a_vm_free_va(struct vm_gk20a *vm,
+                    u64 offset, u64 size,
+                    enum gmmu_pgsz_gk20a pgsz_idx);
+
 /* vm-as interface */
 struct nvhost_as_alloc_space_args;
 struct nvhost_as_free_space_args;
index 6dd0c0d..e6ed989 100644 (file)
@@ -151,6 +151,12 @@ struct gk20a_platform {
         * of the CPU.
         */
        void (*dump_platform_dependencies)(struct platform_device *dev);
+
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+       bool virtual_dev;
+       u64 virt_handle;
+       struct task_struct *intr_handler;
+#endif
 };
 
 static inline struct gk20a_platform *gk20a_get_platform(
@@ -163,6 +169,9 @@ extern struct gk20a_platform gk20a_generic_platform;
 #ifdef CONFIG_TEGRA_GK20A
 extern struct gk20a_platform gk20a_tegra_platform;
 extern struct gk20a_platform gm20b_tegra_platform;
+#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
+extern struct gk20a_platform vgpu_tegra_platform;
+#endif
 #endif
 
 static inline bool gk20a_platform_has_syncpoints(struct platform_device *dev)
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
new file mode 100644 (file)
index 0000000..ea4fde7
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Tegra Virtualized GPU Platform Interface
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/of_platform.h>
+
+#include "gk20a.h"
+#include "hal_gk20a.h"
+#include "platform_gk20a.h"
+
+static int gk20a_tegra_probe(struct platform_device *dev)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(dev);
+       struct device_node *np = dev->dev.of_node;
+       const __be32 *host1x_ptr;
+       struct platform_device *host1x_pdev = NULL;
+
+       host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
+       if (host1x_ptr) {
+               struct device_node *host1x_node =
+                       of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
+
+               host1x_pdev = of_find_device_by_node(host1x_node);
+               if (!host1x_pdev) {
+                       dev_warn(&dev->dev, "host1x device not available");
+                       return -EPROBE_DEFER;
+               }
+
+       } else {
+               host1x_pdev = to_platform_device(dev->dev.parent);
+               dev_warn(&dev->dev, "host1x reference not found. assuming host1x to be parent");
+       }
+
+       platform->g->host1x_dev = host1x_pdev;
+
+       return 0;
+}
+
+struct gk20a_platform vgpu_tegra_platform = {
+       .has_syncpoints = true,
+
+       /* power management configuration */
+       .can_railgate           = false,
+       .enable_slcg            = false,
+       .enable_blcg            = false,
+       .enable_elcg            = false,
+       .enable_elpg            = false,
+       .enable_aelpg           = false,
+
+       .probe = gk20a_tegra_probe,
+
+       .virtual_dev = true,
+};
index 7e58013..86d049c 100644 (file)
@@ -102,5 +102,14 @@ static void gm20b_fifo_trigger_mmu_fault(struct gk20a *g,
 void gm20b_init_fifo(struct gpu_ops *gops)
 {
        gops->fifo.bind_channel = channel_gm20b_bind;
+       gops->fifo.unbind_channel = channel_gk20a_unbind;
+       gops->fifo.disable_channel = channel_gk20a_disable;
+       gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
+       gops->fifo.free_inst = channel_gk20a_free_inst;
+       gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
+
+       gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;
+       gops->fifo.update_runlist = gk20a_fifo_update_runlist;
        gops->fifo.trigger_mmu_fault = gm20b_fifo_trigger_mmu_fault;
+       gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 }
index 72500b0..c9c32b9 100644 (file)
@@ -751,4 +751,9 @@ void gm20b_init_gr(struct gpu_ops *gops)
        gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;
 #endif
        gops->gr.get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask;
+       gops->gr.free_channel_ctx = gk20a_free_channel_ctx;
+       gops->gr.alloc_obj_ctx = gk20a_alloc_obj_ctx;
+       gops->gr.free_obj_ctx = gk20a_free_obj_ctx;
+       gops->gr.bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull;
+       gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 }
index ac82d56..ed5b5e0 100644 (file)
@@ -327,4 +327,13 @@ void gm20b_init_mm(struct gpu_ops *gops)
        gops->mm.set_sparse = gm20b_vm_put_sparse;
        gops->mm.clear_sparse = gm20b_vm_clear_sparse;
        gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
+       gops->mm.gmmu_map = gk20a_locked_gmmu_map;
+       gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
+       gops->mm.vm_remove = gk20a_vm_remove_support;
+       gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
+       gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
+       gops->mm.fb_flush = gk20a_mm_fb_flush;
+       gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
+       gops->mm.l2_flush = gk20a_mm_l2_flush;
+       gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/Makefile b/drivers/gpu/nvgpu/vgpu/Makefile
new file mode 100644 (file)
index 0000000..edad717
--- /dev/null
@@ -0,0 +1,10 @@
+GCOV_PROFILE := y
+ccflags-y += -Idrivers/gpu/nvgpu
+ccflags-y += -Wno-multichar
+
+obj-$(CONFIG_TEGRA_GR_VIRTUALIZATION)  = \
+       ltc_vgpu.o \
+       gr_vgpu.o \
+       fifo_vgpu.o \
+       mm_vgpu.o \
+       vgpu.o
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
new file mode 100644 (file)
index 0000000..23dec1f
--- /dev/null
@@ -0,0 +1,569 @@
+/*
+ * Virtualized GPU Fifo
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include "vgpu/vgpu.h"
+#include "gk20a/hw_fifo_gk20a.h"
+#include "gk20a/hw_ram_gk20a.h"
+
+static void vgpu_channel_bind(struct channel_gk20a *ch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_channel_config_params *p =
+                       &msg.params.channel_config;
+       int err;
+
+       gk20a_dbg_info("bind channel %d", ch->hw_chid);
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
+       msg.handle = platform->virt_handle;
+       p->handle = ch->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+
+       ch->bound = true;
+}
+
+static void vgpu_channel_unbind(struct channel_gk20a *ch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+
+       gk20a_dbg_fn("");
+
+       if (ch->bound) {
+               struct tegra_vgpu_cmd_msg msg;
+               struct tegra_vgpu_channel_config_params *p =
+                               &msg.params.channel_config;
+               int err;
+
+               msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
+               msg.handle = platform->virt_handle;
+               p->handle = ch->virt_ctx;
+               err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+               WARN_ON(err || msg.ret);
+       }
+
+       ch->bound = false;
+
+       /*
+        * if we are agrressive then we can destroy the syncpt
+        * resource at this point
+        * if not, then it will be destroyed at channel_free()
+        */
+       if (ch->sync && ch->sync->aggressive_destroy) {
+               ch->sync->destroy(ch->sync);
+               ch->sync = NULL;
+       }
+}
+
+static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
+       msg.handle = platform->virt_handle;
+       p->id = ch->hw_chid;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret) {
+               gk20a_err(dev_from_gk20a(g), "fail");
+               return -ENOMEM;
+       }
+
+       ch->virt_ctx = p->handle;
+       gk20a_dbg_fn("done");
+       return 0;
+}
+
+static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
+       msg.handle = platform->virt_handle;
+       p->handle = ch->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+}
+
+static void vgpu_channel_disable(struct channel_gk20a *ch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_channel_config_params *p =
+                       &msg.params.channel_config;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
+       msg.handle = platform->virt_handle;
+       p->handle = ch->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+}
+
+static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
+                               u32 gpfifo_entries)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+       struct device __maybe_unused *d = dev_from_gk20a(ch->g);
+       struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
+       msg.handle = platform->virt_handle;
+       p->handle = ch->virt_ctx;
+       p->gpfifo_va = gpfifo_base;
+       p->num_entries = gpfifo_entries;
+       p->userd_addr = ch->userd_iova;
+       p->iova = mapping ? 1 : 0;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? -ENOMEM : 0;
+}
+
+static int init_engine_info(struct fifo_gk20a *f)
+{
+       struct fifo_engine_info_gk20a *gr_info;
+       const u32 gr_sw_id = ENGINE_GR_GK20A;
+
+       gk20a_dbg_fn("");
+
+       /* all we really care about finding is the graphics entry    */
+       /* especially early on in sim it probably thinks it has more */
+       f->num_engines = 1;
+
+       gr_info = f->engine_info + gr_sw_id;
+
+       gr_info->sw_id = gr_sw_id;
+       gr_info->name = "gr";
+       /* FIXME: retrieve this from server */
+       gr_info->runlist_id = 0;
+       return 0;
+}
+
+static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+{
+       struct fifo_engine_info_gk20a *engine_info;
+       struct fifo_runlist_info_gk20a *runlist;
+       struct device *d = dev_from_gk20a(g);
+       u32 runlist_id;
+       u32 i;
+       u64 runlist_size;
+
+       gk20a_dbg_fn("");
+
+       f->max_runlists = fifo_eng_runlist_base__size_1_v();
+       f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
+                                 f->max_runlists, GFP_KERNEL);
+       if (!f->runlist_info)
+               goto clean_up;
+
+       engine_info = f->engine_info + ENGINE_GR_GK20A;
+       runlist_id = engine_info->runlist_id;
+       runlist = &f->runlist_info[runlist_id];
+
+       runlist->active_channels =
+               kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
+                       GFP_KERNEL);
+       if (!runlist->active_channels)
+               goto clean_up_runlist_info;
+
+       runlist_size  = sizeof(u16) * f->num_channels;
+       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+               dma_addr_t iova;
+
+               runlist->mem[i].cpuva =
+                       dma_alloc_coherent(d,
+                                       runlist_size,
+                                       &iova,
+                                       GFP_KERNEL);
+               if (!runlist->mem[i].cpuva) {
+                       dev_err(d, "memory allocation failed\n");
+                       goto clean_up_runlist;
+               }
+               runlist->mem[i].iova = iova;
+               runlist->mem[i].size = runlist_size;
+       }
+       mutex_init(&runlist->mutex);
+       init_waitqueue_head(&runlist->runlist_wq);
+
+       /* None of buffers is pinned if this value doesn't change.
+           Otherwise, one of them (cur_buffer) must have been pinned. */
+       runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+
+       gk20a_dbg_fn("done");
+       return 0;
+
+clean_up_runlist:
+       for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
+               if (runlist->mem[i].cpuva)
+                       dma_free_coherent(d,
+                               runlist->mem[i].size,
+                               runlist->mem[i].cpuva,
+                               runlist->mem[i].iova);
+               runlist->mem[i].cpuva = NULL;
+               runlist->mem[i].iova = 0;
+       }
+
+       kfree(runlist->active_channels);
+       runlist->active_channels = NULL;
+
+clean_up_runlist_info:
+       kfree(f->runlist_info);
+       f->runlist_info = NULL;
+
+clean_up:
+       gk20a_dbg_fn("fail");
+       return -ENOMEM;
+}
+
+static int vgpu_init_fifo_setup_sw(struct gk20a *g)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct fifo_gk20a *f = &g->fifo;
+       struct device *d = dev_from_gk20a(g);
+       int chid, err = 0;
+       dma_addr_t iova;
+
+       gk20a_dbg_fn("");
+
+       if (f->sw_ready) {
+               gk20a_dbg_fn("skip init");
+               return 0;
+       }
+
+       f->g = g;
+
+       err = vgpu_get_attribute(platform->virt_handle,
+                               TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
+                               &f->num_channels);
+       if (err)
+               return -ENXIO;
+
+       f->max_engines = ENGINE_INVAL_GK20A;
+
+       f->userd_entry_size = 1 << ram_userd_base_shift_v();
+       f->userd_total_size = f->userd_entry_size * f->num_channels;
+
+       f->userd.cpuva = dma_alloc_coherent(d,
+                                       f->userd_total_size,
+                                       &iova,
+                                       GFP_KERNEL);
+       if (!f->userd.cpuva) {
+               dev_err(d, "memory allocation failed\n");
+               goto clean_up;
+       }
+
+       f->userd.iova = iova;
+       err = gk20a_get_sgtable(d, &f->userd.sgt,
+                               f->userd.cpuva, f->userd.iova,
+                               f->userd_total_size);
+       if (err) {
+               dev_err(d, "failed to create sg table\n");
+               goto clean_up;
+       }
+
+       /* bar1 va */
+       f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
+       if (!f->userd.gpu_va) {
+               dev_err(d, "gmmu mapping failed\n");
+               goto clean_up;
+       }
+
+       gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
+
+       f->userd.size = f->userd_total_size;
+
+       f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
+                               GFP_KERNEL);
+       f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
+                               GFP_KERNEL);
+
+       if (!(f->channel && f->engine_info)) {
+               err = -ENOMEM;
+               goto clean_up;
+       }
+
+       init_engine_info(f);
+
+       init_runlist(g, f);
+
+       for (chid = 0; chid < f->num_channels; chid++) {
+               f->channel[chid].userd_cpu_va =
+                       f->userd.cpuva + chid * f->userd_entry_size;
+               f->channel[chid].userd_iova =
+                       NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
+                               + chid * f->userd_entry_size;
+               f->channel[chid].userd_gpu_va =
+                       f->userd.gpu_va + chid * f->userd_entry_size;
+
+               gk20a_init_channel_support(g, chid);
+       }
+       mutex_init(&f->ch_inuse_mutex);
+
+       f->deferred_reset_pending = false;
+       mutex_init(&f->deferred_reset_mutex);
+
+       f->sw_ready = true;
+
+       gk20a_dbg_fn("done");
+       return 0;
+
+clean_up:
+       gk20a_dbg_fn("fail");
+       /* FIXME: unmap from bar1 */
+       if (f->userd.sgt)
+               gk20a_free_sgtable(&f->userd.sgt);
+       if (f->userd.cpuva)
+               dma_free_coherent(d,
+                               f->userd_total_size,
+                               f->userd.cpuva,
+                               f->userd.iova);
+       f->userd.cpuva = NULL;
+       f->userd.iova = 0;
+
+       memset(&f->userd, 0, sizeof(struct userd_desc));
+
+       kfree(f->channel);
+       f->channel = NULL;
+       kfree(f->engine_info);
+       f->engine_info = NULL;
+
+       return err;
+}
+
+static int vgpu_init_fifo_setup_hw(struct gk20a *g)
+{
+       gk20a_dbg_fn("");
+
+       /* test write, read through bar1 @ userd region before
+        * turning on the snooping */
+       {
+               struct fifo_gk20a *f = &g->fifo;
+               u32 v, v1 = 0x33, v2 = 0x55;
+
+               u32 bar1_vaddr = f->userd.gpu_va;
+               volatile u32 *cpu_vaddr = f->userd.cpuva;
+
+               gk20a_dbg_info("test bar1 @ vaddr 0x%x",
+                          bar1_vaddr);
+
+               v = gk20a_bar1_readl(g, bar1_vaddr);
+
+               *cpu_vaddr = v1;
+               smp_mb();
+
+               if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                       gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+                       return -EINVAL;
+               }
+
+               gk20a_bar1_writel(g, bar1_vaddr, v2);
+
+               if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
+                       gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
+                       return -EINVAL;
+               }
+
+               /* is it visible to the cpu? */
+               if (*cpu_vaddr != v2) {
+                       gk20a_err(dev_from_gk20a(g),
+                               "cpu didn't see bar1 write @ %p!",
+                               cpu_vaddr);
+               }
+
+               /* put it back */
+               gk20a_bar1_writel(g, bar1_vaddr, v);
+       }
+
+       gk20a_dbg_fn("done");
+
+       return 0;
+}
+
+int vgpu_init_fifo_support(struct gk20a *g)
+{
+       u32 err;
+
+       gk20a_dbg_fn("");
+
+       err = vgpu_init_fifo_setup_sw(g);
+       if (err)
+               return err;
+
+       err = vgpu_init_fifo_setup_hw(g);
+       return err;
+}
+
+static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct fifo_gk20a *f = &g->fifo;
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_channel_config_params *p =
+                       &msg.params.channel_config;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
+       msg.handle = platform->virt_handle;
+       p->handle = f->channel[hw_chid].virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       if (err || msg.ret) {
+               gk20a_err(dev_from_gk20a(g),
+                       "preempt channel %d failed\n", hw_chid);
+               err = -ENOMEM;
+       }
+
+       return err;
+}
+
+static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
+                       u32 num_entries)
+{
+       struct tegra_vgpu_cmd_msg *msg;
+       struct tegra_vgpu_runlist_params *p;
+       size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
+       char *ptr;
+       int err;
+
+       msg = kmalloc(size, GFP_KERNEL);
+       if (!msg)
+               return -1;
+
+       msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
+       msg->handle = handle;
+       p = &msg->params.runlist;
+       p->runlist_id = runlist_id;
+       p->num_entries = num_entries;
+
+       ptr = (char *)msg + sizeof(*msg);
+       memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
+       err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
+
+       err = (err || msg->ret) ? -1 : 0;
+       kfree(msg);
+       return err;
+}
+
+static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+                                       u32 hw_chid, bool add,
+                                       bool wait_for_finish)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct fifo_gk20a *f = &g->fifo;
+       struct fifo_runlist_info_gk20a *runlist;
+       u16 *runlist_entry = NULL;
+       u32 count = 0;
+
+       gk20a_dbg_fn("");
+
+       runlist = &f->runlist_info[runlist_id];
+
+       /* valid channel, add/remove it from active list.
+          Otherwise, keep active list untouched for suspend/resume. */
+       if (hw_chid != ~0) {
+               if (add) {
+                       if (test_and_set_bit(hw_chid,
+                               runlist->active_channels) == 1)
+                               return 0;
+               } else {
+                       if (test_and_clear_bit(hw_chid,
+                               runlist->active_channels) == 0)
+                               return 0;
+               }
+       }
+
+       if (hw_chid != ~0 || /* add/remove a valid channel */
+           add /* resume to add all channels back */) {
+               u32 chid;
+
+               runlist_entry = runlist->mem[0].cpuva;
+               for_each_set_bit(chid,
+                       runlist->active_channels, f->num_channels) {
+                       gk20a_dbg_info("add channel %d to runlist", chid);
+                       runlist_entry[0] = chid;
+                       runlist_entry++;
+                       count++;
+               }
+       } else  /* suspend to remove all channels */
+               count = 0;
+
+       return vgpu_submit_runlist(platform->virt_handle, runlist_id,
+                               runlist->mem[0].cpuva, count);
+}
+
+/* add/remove a channel from runlist
+   special cases below: runlist->active_channels will NOT be changed.
+   (hw_chid == ~0 && !add) means remove all active channels from runlist.
+   (hw_chid == ~0 &&  add) means restore all active channels on runlist. */
+static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
+                               u32 hw_chid, bool add, bool wait_for_finish)
+{
+       struct fifo_runlist_info_gk20a *runlist = NULL;
+       struct fifo_gk20a *f = &g->fifo;
+       u32 ret = 0;
+
+       gk20a_dbg_fn("");
+
+       runlist = &f->runlist_info[runlist_id];
+
+       mutex_lock(&runlist->mutex);
+
+       ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
+                                       wait_for_finish);
+
+       mutex_unlock(&runlist->mutex);
+       return ret;
+}
+
+static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
+{
+       gk20a_dbg_fn("");
+
+       return 0;
+}
+
+void vgpu_init_fifo_ops(struct gpu_ops *gops)
+{
+       gops->fifo.bind_channel = vgpu_channel_bind;
+       gops->fifo.unbind_channel = vgpu_channel_unbind;
+       gops->fifo.disable_channel = vgpu_channel_disable;
+       gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
+       gops->fifo.free_inst = vgpu_channel_free_inst;
+       gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
+       gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
+       gops->fifo.update_runlist = vgpu_fifo_update_runlist;
+       gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
+}
+
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
new file mode 100644 (file)
index 0000000..a7e966d
--- /dev/null
@@ -0,0 +1,687 @@
+/*
+ * Virtualized GPU Graphics
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu/vgpu.h"
+#include "gk20a/hw_gr_gk20a.h"
+
+static int vgpu_gr_commit_inst(struct channel_gk20a *c, u64 gpu_va)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? -1 : 0;
+}
+
+static int vgpu_gr_commit_global_ctx_buffers(struct gk20a *g,
+                                       struct channel_gk20a *c, bool patch)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_COMMIT_GR_GLOBAL_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? -1 : 0;
+}
+
+/* load saved fresh copy of gloden image into channel gr_ctx */
+static int vgpu_gr_load_golden_ctx_image(struct gk20a *g,
+                                       struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_LOAD_GR_GOLDEN_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? -1 : 0;
+}
+
+static int vgpu_gr_init_ctx_state(struct gk20a *g, struct gr_gk20a *gr)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+
+       gk20a_dbg_fn("");
+
+       vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_GOLDEN_CTX_SIZE,
+                       &g->gr.ctx_vars.golden_image_size);
+       vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_ZCULL_CTX_SIZE,
+                       &g->gr.ctx_vars.zcull_ctxsw_image_size);
+       if (!g->gr.ctx_vars.golden_image_size ||
+               !g->gr.ctx_vars.zcull_ctxsw_image_size)
+               return -ENXIO;
+
+       gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
+       g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
+       return 0;
+}
+
+static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g)
+{
+       struct gr_gk20a *gr = &g->gr;
+       int attr_buffer_size;
+
+       u32 cb_buffer_size = gr->bundle_cb_default_size *
+               gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
+
+       u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() *
+               gr_scc_pagepool_total_pages_byte_granularity_v();
+
+       gk20a_dbg_fn("");
+
+       attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g);
+
+       gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size);
+       gr->global_ctx_buffer[CIRCULAR].size = cb_buffer_size;
+
+       gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
+       gr->global_ctx_buffer[PAGEPOOL].size = pagepool_buffer_size;
+
+       gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
+       gr->global_ctx_buffer[ATTRIBUTE].size = attr_buffer_size;
+
+       gk20a_dbg_info("priv access map size : %d",
+               gr->ctx_vars.priv_access_map_size);
+       gr->global_ctx_buffer[PRIV_ACCESS_MAP].size =
+               gr->ctx_vars.priv_access_map_size;
+
+       return 0;
+}
+
+static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
+                                       struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       struct vm_gk20a *ch_vm = c->vm;
+       u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+       u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+       struct gr_gk20a *gr = &g->gr;
+       u64 gpu_va;
+       u32 i;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       /* FIXME: add VPR support */
+
+       /* Circular Buffer */
+       gpu_va = gk20a_vm_alloc_va(ch_vm,
+                               gr->global_ctx_buffer[CIRCULAR].size, 0);
+
+       if (!gpu_va)
+               goto clean_up;
+       g_bfr_va[CIRCULAR_VA] = gpu_va;
+       g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].size;
+
+       /* Attribute Buffer */
+       gpu_va = gk20a_vm_alloc_va(ch_vm,
+                               gr->global_ctx_buffer[ATTRIBUTE].size, 0);
+
+       if (!gpu_va)
+               goto clean_up;
+       g_bfr_va[ATTRIBUTE_VA] = gpu_va;
+       g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].size;
+
+       /* Page Pool */
+       gpu_va = gk20a_vm_alloc_va(ch_vm,
+                               gr->global_ctx_buffer[PAGEPOOL].size, 0);
+       if (!gpu_va)
+               goto clean_up;
+       g_bfr_va[PAGEPOOL_VA] = gpu_va;
+       g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].size;
+
+       /* Priv register Access Map */
+       gpu_va = gk20a_vm_alloc_va(ch_vm,
+                               gr->global_ctx_buffer[PRIV_ACCESS_MAP].size, 0);
+       if (!gpu_va)
+               goto clean_up;
+       g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
+       g_bfr_size[PRIV_ACCESS_MAP_VA] =
+               gr->global_ctx_buffer[PRIV_ACCESS_MAP].size;
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       p->cb_va = g_bfr_va[CIRCULAR_VA];
+       p->attr_va = g_bfr_va[ATTRIBUTE_VA];
+       p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
+       p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               goto clean_up;
+
+       c->ch_ctx.global_ctx_buffer_mapped = true;
+       return 0;
+
+ clean_up:
+       for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+               if (g_bfr_va[i]) {
+                       gk20a_vm_free_va(ch_vm, g_bfr_va[i],
+                                       g_bfr_size[i], 0);
+                       g_bfr_va[i] = 0;
+               }
+       }
+       return -ENOMEM;
+}
+
+static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
+       struct vm_gk20a *ch_vm = c->vm;
+       u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va;
+       u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
+       u32 i;
+
+       gk20a_dbg_fn("");
+
+       if (c->ch_ctx.global_ctx_buffer_mapped) {
+               struct tegra_vgpu_cmd_msg msg;
+               struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+               int err;
+
+               msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX;
+               msg.handle = platform->virt_handle;
+               p->handle = c->virt_ctx;
+               err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+               WARN_ON(err || msg.ret);
+       }
+
+       for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
+               if (g_bfr_va[i]) {
+                       gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], 0);
+                       g_bfr_va[i] = 0;
+                       g_bfr_size[i] = 0;
+               }
+       }
+       c->ch_ctx.global_ctx_buffer_mapped = false;
+}
+
+static int vgpu_gr_alloc_channel_gr_ctx(struct gk20a *g,
+                                       struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       struct gr_gk20a *gr = &g->gr;
+       struct gr_ctx_desc *gr_ctx;
+       struct vm_gk20a *ch_vm = c->vm;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       if (gr->ctx_vars.buffer_size == 0)
+               return 0;
+
+       /* alloc channel gr ctx buffer */
+       gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
+       gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
+
+       gr_ctx = kzalloc(sizeof(*gr_ctx), GFP_KERNEL);
+       if (!gr_ctx)
+               return -ENOMEM;
+
+       gr_ctx->size = gr->ctx_vars.buffer_total_size;
+       gr_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, gr_ctx->size, 0);
+
+       if (!gr_ctx->gpu_va) {
+               kfree(gr_ctx);
+               return -ENOMEM;
+       }
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       p->gr_ctx_va = gr_ctx->gpu_va;
+       p->class_num = c->obj_class;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       if (err || msg.ret) {
+               gk20a_vm_free_va(ch_vm, gr_ctx->gpu_va, gr_ctx->size, 0);
+               err = -ENOMEM;
+       } else
+               c->ch_ctx.gr_ctx = gr_ctx;
+
+       return err;
+}
+
+static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
+       struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+       struct vm_gk20a *ch_vm = c->vm;
+
+       gk20a_dbg_fn("");
+
+       if (ch_ctx->gr_ctx && ch_ctx->gr_ctx->gpu_va) {
+               struct tegra_vgpu_cmd_msg msg;
+               struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+               int err;
+
+               msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_CTX;
+               msg.handle = platform->virt_handle;
+               p->handle = c->virt_ctx;
+               err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+               WARN_ON(err || msg.ret);
+
+               gk20a_vm_free_va(ch_vm, ch_ctx->gr_ctx->gpu_va,
+                               ch_ctx->gr_ctx->size, 0);
+               ch_ctx->gr_ctx->gpu_va = 0;
+               kfree(ch_ctx->gr_ctx);
+       }
+}
+
+static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
+                                       struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+       struct vm_gk20a *ch_vm = c->vm;
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       patch_ctx->size = 128 * sizeof(u32);
+       patch_ctx->gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->size, 0);
+       if (!patch_ctx->gpu_va)
+               return -ENOMEM;
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_GR_PATCH_CTX;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       p->patch_ctx_va = patch_ctx->gpu_va;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret) {
+               gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
+               err = -ENOMEM;
+       }
+
+       return err;
+}
+
+static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
+       struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx;
+       struct vm_gk20a *ch_vm = c->vm;
+
+       gk20a_dbg_fn("");
+
+       if (patch_ctx->gpu_va) {
+               struct tegra_vgpu_cmd_msg msg;
+               struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
+               int err;
+
+               msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX;
+               msg.handle = platform->virt_handle;
+               p->handle = c->virt_ctx;
+               err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+               WARN_ON(err || msg.ret);
+
+               gk20a_vm_free_va(ch_vm, patch_ctx->gpu_va, patch_ctx->size, 0);
+               patch_ctx->gpu_va = 0;
+       }
+}
+
+static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
+{
+       gk20a_dbg_fn("");
+
+       vgpu_gr_unmap_global_ctx_buffers(c);
+       vgpu_gr_free_channel_patch_ctx(c);
+       if (!gk20a_is_channel_marked_as_tsg(c))
+               vgpu_gr_free_channel_gr_ctx(c);
+
+       /* zcull_ctx, pm_ctx */
+
+       memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
+
+       c->num_objects = 0;
+       c->first_init = false;
+}
+
+static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
+                               struct nvhost_alloc_obj_ctx_args *args)
+{
+       struct gk20a *g = c->g;
+       struct fifo_gk20a *f = &g->fifo;
+       struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+       struct tsg_gk20a *tsg = NULL;
+       int err = 0;
+
+       gk20a_dbg_fn("");
+
+       /* an address space needs to have been bound at this point.*/
+       if (!gk20a_channel_as_bound(c)) {
+               gk20a_err(dev_from_gk20a(g),
+                          "not bound to address space at time"
+                          " of grctx allocation");
+               return -EINVAL;
+       }
+
+       if (!g->ops.gr.is_valid_class(g, args->class_num)) {
+               gk20a_err(dev_from_gk20a(g),
+                          "invalid obj class 0x%x", args->class_num);
+               err = -EINVAL;
+               goto out;
+       }
+       c->obj_class = args->class_num;
+
+       /* FIXME: add TSG support */
+       if (gk20a_is_channel_marked_as_tsg(c))
+               tsg = &f->tsg[c->tsgid];
+
+       /* allocate gr ctx buffer */
+       if (!ch_ctx->gr_ctx) {
+               err = vgpu_gr_alloc_channel_gr_ctx(g, c);
+               if (err) {
+                       gk20a_err(dev_from_gk20a(g),
+                               "fail to allocate gr ctx buffer");
+                       goto out;
+               }
+       } else {
+               /*TBD: needs to be more subtle about which is
+                * being allocated as some are allowed to be
+                * allocated along same channel */
+               gk20a_err(dev_from_gk20a(g),
+                       "too many classes alloc'd on same channel");
+               err = -EINVAL;
+               goto out;
+       }
+
+       /* commit gr ctx buffer */
+       err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->gpu_va);
+       if (err) {
+               gk20a_err(dev_from_gk20a(g),
+                       "fail to commit gr ctx buffer");
+               goto out;
+       }
+
+       /* allocate patch buffer */
+       if (ch_ctx->patch_ctx.pages == NULL) {
+               err = vgpu_gr_alloc_channel_patch_ctx(g, c);
+               if (err) {
+                       gk20a_err(dev_from_gk20a(g),
+                               "fail to allocate patch buffer");
+                       goto out;
+               }
+       }
+
+       /* map global buffer to channel gpu_va and commit */
+       if (!ch_ctx->global_ctx_buffer_mapped) {
+               err = vgpu_gr_map_global_ctx_buffers(g, c);
+               if (err) {
+                       gk20a_err(dev_from_gk20a(g),
+                               "fail to map global ctx buffer");
+                       goto out;
+               }
+               gr_gk20a_elpg_protected_call(g,
+                               vgpu_gr_commit_global_ctx_buffers(g, c, true));
+       }
+
+       /* load golden image */
+       if (!c->first_init) {
+               err = gr_gk20a_elpg_protected_call(g,
+                               vgpu_gr_load_golden_ctx_image(g, c));
+               if (err) {
+                       gk20a_err(dev_from_gk20a(g),
+                               "fail to load golden ctx image");
+                       goto out;
+               }
+               c->first_init = true;
+       }
+
+       c->num_objects++;
+
+       gk20a_dbg_fn("done");
+       return 0;
+out:
+       /* 1. gr_ctx, patch_ctx and global ctx buffer mapping
+          can be reused so no need to release them.
+          2. golden image load is a one time thing so if
+          they pass, no need to undo. */
+       gk20a_err(dev_from_gk20a(g), "fail");
+       return err;
+}
+
+static int vgpu_gr_free_obj_ctx(struct channel_gk20a  *c,
+                               struct nvhost_free_obj_ctx_args *args)
+{
+       unsigned long timeout = gk20a_get_gr_idle_timeout(c->g);
+
+       gk20a_dbg_fn("");
+
+       if (c->num_objects == 0)
+               return 0;
+
+       c->num_objects--;
+
+       if (c->num_objects == 0) {
+               c->first_init = false;
+               gk20a_disable_channel(c,
+                       !c->has_timedout,
+                       timeout);
+       }
+
+       return 0;
+}
+
+static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+
+       gk20a_dbg_fn("");
+
+       if (vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
+               return -ENOMEM;
+
+       if (vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
+                       &gr->max_tpc_per_gpc_count))
+               return -ENOMEM;
+
+       if (vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
+                       &gr->max_tpc_count))
+               return -ENOMEM;
+
+       g->ops.gr.bundle_cb_defaults(g);
+       g->ops.gr.cb_size_default(g);
+       g->ops.gr.calc_global_ctx_buffer_size(g);
+       return 0;
+}
+
+static int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
+                               struct channel_gk20a *c, u64 zcull_va,
+                               u32 mode)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_zcull_bind_params *p = &msg.params.zcull_bind;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_ZCULL;
+       msg.handle = platform->virt_handle;
+       p->handle = c->virt_ctx;
+       p->zcull_va = zcull_va;
+       p->mode = mode;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? -ENOMEM : 0;
+}
+
+static int vgpu_gr_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
+                               struct gr_zcull_info *zcull_params)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_zcull_info_params *p = &msg.params.zcull_info;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       msg.cmd = TEGRA_VGPU_CMD_GET_ZCULL_INFO;
+       msg.handle = platform->virt_handle;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               return -ENOMEM;
+
+       zcull_params->width_align_pixels = p->width_align_pixels;
+       zcull_params->height_align_pixels = p->height_align_pixels;
+       zcull_params->pixel_squares_by_aliquots = p->pixel_squares_by_aliquots;
+       zcull_params->aliquot_total = p->aliquot_total;
+       zcull_params->region_byte_multiplier = p->region_byte_multiplier;
+       zcull_params->region_header_size = p->region_header_size;
+       zcull_params->subregion_header_size = p->subregion_header_size;
+       zcull_params->subregion_width_align_pixels =
+               p->subregion_width_align_pixels;
+       zcull_params->subregion_height_align_pixels =
+               p->subregion_height_align_pixels;
+       zcull_params->subregion_count = p->subregion_count;
+
+       return 0;
+}
+
+static void vgpu_remove_gr_support(struct gr_gk20a *gr)
+{
+       gk20a_dbg_fn("");
+
+       gk20a_allocator_destroy(&gr->comp_tags);
+}
+
+static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
+{
+       struct gr_gk20a *gr = &g->gr;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       if (gr->sw_ready) {
+               gk20a_dbg_fn("skip init");
+               return 0;
+       }
+
+       gr->g = g;
+
+       err = vgpu_gr_init_gr_config(g, gr);
+       if (err)
+               goto clean_up;
+
+       err = vgpu_gr_init_ctx_state(g, gr);
+       if (err)
+               goto clean_up;
+
+       err = g->ops.ltc.init_comptags(g, gr);
+       if (err)
+               goto clean_up;
+
+       err = vgpu_gr_alloc_global_ctx_buffers(g);
+       if (err)
+               goto clean_up;
+
+       mutex_init(&gr->ctx_mutex);
+
+       gr->remove_support = vgpu_remove_gr_support;
+       gr->sw_ready = true;
+
+       gk20a_dbg_fn("done");
+       return 0;
+
+clean_up:
+       gk20a_err(dev_from_gk20a(g), "fail");
+       vgpu_remove_gr_support(gr);
+       return err;
+}
+
+int vgpu_init_gr_support(struct gk20a *g)
+{
+       gk20a_dbg_fn("");
+
+       return vgpu_gr_init_gr_setup_sw(g);
+}
+
+struct gr_isr_data {
+       u32 addr;
+       u32 data_lo;
+       u32 data_hi;
+       u32 curr_ctx;
+       u32 chid;
+       u32 offset;
+       u32 sub_chan;
+       u32 class_num;
+};
+
+static int vgpu_gr_handle_notify_pending(struct gk20a *g,
+                                       struct gr_isr_data *isr_data)
+{
+       struct fifo_gk20a *f = &g->fifo;
+       struct channel_gk20a *ch = &f->channel[isr_data->chid];
+
+       gk20a_dbg_fn("");
+       wake_up(&ch->notifier_wq);
+       return 0;
+}
+
+int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
+{
+       struct gr_isr_data isr_data;
+
+       gk20a_dbg_fn("");
+
+       isr_data.chid = info->chid;
+
+       if (info->type == TEGRA_VGPU_GR_INTR_NOTIFY)
+               vgpu_gr_handle_notify_pending(g, &isr_data);
+
+       return 0;
+}
+
+void vgpu_init_gr_ops(struct gpu_ops *gops)
+{
+       gops->gr.free_channel_ctx = vgpu_gr_free_channel_ctx;
+       gops->gr.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx;
+       gops->gr.free_obj_ctx = vgpu_gr_free_obj_ctx;
+       gops->gr.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull;
+       gops->gr.get_zcull_info = vgpu_gr_get_zcull_info;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
new file mode 100644 (file)
index 0000000..ddff23b
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Virtualized GPU L2
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "vgpu/vgpu.h"
+
+static int vgpu_determine_L2_size_bytes(struct gk20a *g)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       u32 cache_size = 0;
+
+       gk20a_dbg_fn("");
+
+       if (vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size))
+               dev_err(dev_from_gk20a(g), "unable to get L2 size");
+
+       return cache_size;
+}
+
+static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       u32 max_comptag_lines = 0;
+
+       gk20a_dbg_fn("");
+
+       vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_COMPTAG_LINES, &max_comptag_lines);
+       if (max_comptag_lines < 2)
+               return -ENXIO;
+
+       gk20a_allocator_init(&gr->comp_tags, "comptag",
+                             1, /* start */
+                             max_comptag_lines - 1, /* length*/
+                             1); /* align */
+       return 0;
+}
+
+void vgpu_init_ltc_ops(struct gpu_ops *gops)
+{
+       gops->ltc.determine_L2_size_bytes = vgpu_determine_L2_size_bytes;
+       gops->ltc.init_comptags = vgpu_ltc_init_comptags;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
new file mode 100644 (file)
index 0000000..6ed1dec
--- /dev/null
@@ -0,0 +1,425 @@
+/*
+ * Virtualized GPU Memory Management
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include "vgpu/vgpu.h"
+
+/* note: keep the page sizes sorted lowest to highest here */
+static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
+static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
+
+static int vgpu_init_mm_setup_sw(struct gk20a *g)
+{
+       struct mm_gk20a *mm = &g->mm;
+
+       gk20a_dbg_fn("");
+
+       if (mm->sw_ready) {
+               gk20a_dbg_fn("skip init");
+               return 0;
+       }
+
+       mm->g = g;
+       mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
+       mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
+       mm->pde_stride    = mm->big_page_size << 10;
+       mm->pde_stride_shift = ilog2(mm->pde_stride);
+       BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
+
+       /*TBD: make channel vm size configurable */
+       mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
+
+       gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
+
+       mm->sw_ready = true;
+
+       return 0;
+}
+
+int vgpu_init_mm_support(struct gk20a *g)
+{
+       gk20a_dbg_fn("");
+
+       return vgpu_init_mm_setup_sw(g);
+}
+
+static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
+                               u64 map_offset,
+                               struct sg_table *sgt,
+                               u64 buffer_offset,
+                               u64 size,
+                               int pgsz_idx,
+                               u8 kind_v,
+                               u32 ctag_offset,
+                               u32 flags,
+                               int rw_flag,
+                               bool clear_ctags)
+{
+       int err = 0;
+       struct device *d = dev_from_vm(vm);
+       struct gk20a *g = gk20a_from_vm(vm);
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+       u64 addr = gk20a_mm_iova_addr(sgt->sgl);
+       u8 prot;
+
+       gk20a_dbg_fn("");
+
+       /* Allocate (or validate when map_offset != 0) the virtual address. */
+       if (!map_offset) {
+               map_offset = gk20a_vm_alloc_va(vm, size,
+                                         pgsz_idx);
+               if (!map_offset) {
+                       gk20a_err(d, "failed to allocate va space");
+                       err = -ENOMEM;
+                       goto fail;
+               }
+       }
+
+       if (rw_flag == gk20a_mem_flag_read_only)
+               prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
+       else if (rw_flag == gk20a_mem_flag_write_only)
+               prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
+       else
+               prot = TEGRA_VGPU_MAP_PROT_NONE;
+
+       msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
+       msg.handle = platform->virt_handle;
+       p->handle = vm->handle;
+       p->addr = addr;
+       p->gpu_va = map_offset;
+       p->size = size;
+       p->pgsz_idx = pgsz_idx;
+       p->iova = mapping ? 1 : 0;
+       p->kind = kind_v;
+       p->cacheable =
+               (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
+       p->prot = prot;
+       p->ctag_offset = ctag_offset;
+       p->clear_ctags = clear_ctags;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               goto fail;
+
+       vm->tlb_dirty = true;
+       return map_offset;
+fail:
+       gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
+       return 0;
+}
+
+static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
+                               u64 vaddr,
+                               u64 size,
+                               int pgsz_idx,
+                               bool va_allocated,
+                               int rw_flag)
+{
+       struct gk20a *g = gk20a_from_vm(vm);
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       if (va_allocated) {
+               err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
+               if (err) {
+                       dev_err(dev_from_vm(vm),
+                               "failed to free va");
+                       return;
+               }
+       }
+
+       msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
+       msg.handle = platform->virt_handle;
+       p->handle = vm->handle;
+       p->gpu_va = vaddr;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               dev_err(dev_from_vm(vm),
+                       "failed to update gmmu ptes on unmap");
+
+       vm->tlb_dirty = true;
+}
+
+static void vgpu_vm_remove_support(struct vm_gk20a *vm)
+{
+       struct gk20a *g = vm->mm->g;
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct mapped_buffer_node *mapped_buffer;
+       struct vm_reserved_va_node *va_node, *va_node_tmp;
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+       struct rb_node *node;
+       int err;
+
+       gk20a_dbg_fn("");
+       mutex_lock(&vm->update_gmmu_lock);
+
+       /* TBD: add a flag here for the unmap code to recognize teardown
+        * and short-circuit any otherwise expensive operations. */
+
+       node = rb_first(&vm->mapped_buffers);
+       while (node) {
+               mapped_buffer =
+                       container_of(node, struct mapped_buffer_node, node);
+               gk20a_vm_unmap_locked(mapped_buffer);
+               node = rb_first(&vm->mapped_buffers);
+       }
+
+       /* destroy remaining reserved memory areas */
+       list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
+               reserved_va_list) {
+               list_del(&va_node->reserved_va_list);
+               kfree(va_node);
+       }
+
+       msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
+       msg.handle = platform->virt_handle;
+       p->handle = vm->handle;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+
+       gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+       gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
+
+       mutex_unlock(&vm->update_gmmu_lock);
+
+       /* release zero page if used */
+       if (vm->zero_page_cpuva)
+               dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
+                                 vm->zero_page_cpuva, vm->zero_page_iova);
+
+       /* vm is not used anymore. release it. */
+       kfree(vm);
+}
+
+u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct dma_iommu_mapping *mapping =
+                       to_dma_iommu_mapping(dev_from_gk20a(g));
+       u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
+       int err;
+
+       msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
+       msg.handle = platform->virt_handle;
+       p->addr = addr;
+       p->size = size;
+       p->iova = mapping ? 1 : 0;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               addr = 0;
+       else
+               addr = p->gpu_va;
+
+       return addr;
+}
+
+/* address space interfaces for the gk20a module */
+static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
+{
+       struct gk20a_as *as = as_share->as;
+       struct gk20a *g = gk20a_from_as(as);
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
+       struct mm_gk20a *mm = &g->mm;
+       struct vm_gk20a *vm;
+       u64 vma_size;
+       u32 num_pages, low_hole_pages;
+       char name[32];
+       int err;
+
+       gk20a_dbg_fn("");
+
+       vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+       if (!vm)
+               return -ENOMEM;
+
+       as_share->vm = vm;
+
+       vm->mm = mm;
+       vm->as_share = as_share;
+
+       vm->big_pages = true;
+
+       vm->va_start  = mm->pde_stride;   /* create a one pde hole */
+       vm->va_limit  = mm->channel.size; /* note this means channel.size is
+                                            really just the max */
+
+       msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
+       msg.handle = platform->virt_handle;
+       p->size = vm->va_limit;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       if (err || msg.ret)
+               return -ENOMEM;
+
+       vm->handle = p->handle;
+
+       /* low-half: alloc small pages */
+       /* high-half: alloc big pages */
+       vma_size = mm->channel.size >> 1;
+
+       snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                gmmu_page_sizes[gmmu_page_size_small]>>10);
+       num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
+
+       /* num_pages above is without regard to the low-side hole. */
+       low_hole_pages = (vm->va_start >>
+                         gmmu_page_shifts[gmmu_page_size_small]);
+
+       gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
+             low_hole_pages,             /* start */
+             num_pages - low_hole_pages, /* length */
+             1);                         /* align */
+
+       snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
+                gmmu_page_sizes[gmmu_page_size_big]>>10);
+
+       num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
+       gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
+                             num_pages, /* start */
+                             num_pages, /* length */
+                             1); /* align */
+
+       vm->mapped_buffers = RB_ROOT;
+
+       mutex_init(&vm->update_gmmu_lock);
+       kref_init(&vm->ref);
+       INIT_LIST_HEAD(&vm->reserved_va_list);
+
+       vm->enable_ctag = true;
+
+       return 0;
+}
+
+static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
+                               struct channel_gk20a *ch)
+{
+       struct vm_gk20a *vm = as_share->vm;
+       struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       ch->vm = vm;
+       msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
+       msg.handle = platform->virt_handle;
+       p->as_handle = vm->handle;
+       p->chan_handle = ch->virt_ctx;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       if (err || msg.ret) {
+               ch->vm = NULL;
+               err = -ENOMEM;
+       }
+
+       return err;
+}
+
+static void vgpu_cache_maint(u64 handle, u8 op)
+{
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
+       int err;
+
+       msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
+       msg.handle = handle;
+       p->op = op;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+}
+
+static int vgpu_mm_fb_flush(struct gk20a *g)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+
+       gk20a_dbg_fn("");
+
+       vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
+       return 0;
+}
+
+static void vgpu_mm_l2_invalidate(struct gk20a *g)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+
+       gk20a_dbg_fn("");
+
+       vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
+}
+
+static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       u8 op;
+
+       gk20a_dbg_fn("");
+
+       if (invalidate)
+               op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
+       else
+               op =  TEGRA_VGPU_L2_MAINT_FLUSH;
+
+       vgpu_cache_maint(platform->virt_handle, op);
+}
+
+static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
+{
+       struct gk20a *g = gk20a_from_vm(vm);
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
+       int err;
+
+       gk20a_dbg_fn("");
+
+       /* No need to invalidate if tlb is clean */
+       mutex_lock(&vm->update_gmmu_lock);
+       if (!vm->tlb_dirty) {
+               mutex_unlock(&vm->update_gmmu_lock);
+               return;
+       }
+
+       msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
+       msg.handle = platform->virt_handle;
+       p->handle = vm->handle;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+       WARN_ON(err || msg.ret);
+       vm->tlb_dirty = false;
+       mutex_unlock(&vm->update_gmmu_lock);
+}
+
+void vgpu_init_mm_ops(struct gpu_ops *gops)
+{
+       gops->mm.gmmu_map = vgpu_locked_gmmu_map;
+       gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
+       gops->mm.vm_remove = vgpu_vm_remove_support;
+       gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
+       gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
+       gops->mm.fb_flush = vgpu_mm_fb_flush;
+       gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
+       gops->mm.l2_flush = vgpu_mm_l2_flush;
+       gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
new file mode 100644 (file)
index 0000000..cfe307f
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ * Virtualized GPU
+ *
+ * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
+#include "vgpu/vgpu.h"
+#include "gk20a/debug_gk20a.h"
+#include "gk20a/hal_gk20a.h"
+#include "gk20a/hw_mc_gk20a.h"
+
+static inline int vgpu_comm_init(struct platform_device *pdev)
+{
+       size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
+
+       return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3,
+                               queue_sizes, TEGRA_VGPU_QUEUE_CMD,
+                               ARRAY_SIZE(queue_sizes));
+}
+
+static inline void vgpu_comm_deinit(void)
+{
+       size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
+
+       tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD,
+                       ARRAY_SIZE(queue_sizes));
+}
+
+int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
+               size_t size_out)
+{
+       void *handle;
+       size_t size = size_in;
+       void *data = msg;
+       int err;
+
+       err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT,
+                               tegra_gr_comm_get_server_vmid(),
+                               TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size);
+       if (!err) {
+               WARN_ON(size < size_out);
+               memcpy(msg, data, size_out);
+               tegra_gr_comm_release(handle);
+       }
+
+       return err;
+}
+
+static u64 vgpu_connect(void)
+{
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_connect_params *p = &msg.params.connect;
+       int err;
+
+       msg.cmd = TEGRA_VGPU_CMD_CONNECT;
+       p->module = TEGRA_VGPU_MODULE_GPU;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       return (err || msg.ret) ? 0 : p->handle;
+}
+
+int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
+{
+       struct tegra_vgpu_cmd_msg msg;
+       struct tegra_vgpu_attrib_params *p = &msg.params.attrib;
+       int err;
+
+       msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE;
+       msg.handle = handle;
+       p->attrib = attrib;
+       err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
+
+       if (err || msg.ret)
+               return -1;
+
+       *value = p->value;
+       return 0;
+}
+
+static int vgpu_intr_thread(void *dev_id)
+{
+       struct gk20a *g = dev_id;
+
+       while (true) {
+               struct tegra_vgpu_intr_msg *msg;
+               u32 sender;
+               void *handle;
+               size_t size;
+               int err;
+
+               err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT,
+                                       TEGRA_VGPU_QUEUE_INTR, &handle,
+                                       (void **)&msg, &size, &sender);
+               if (WARN_ON(err))
+                       continue;
+
+               if (msg->event == TEGRA_VGPU_EVENT_ABORT) {
+                       tegra_gr_comm_release(handle);
+                       break;
+               }
+
+               if (msg->unit == TEGRA_VGPU_INTR_GR)
+                       vgpu_gr_isr(g, &msg->info.gr_intr);
+
+               tegra_gr_comm_release(handle);
+       }
+
+       while (!kthread_should_stop())
+               msleep(10);
+       return 0;
+}
+
+static void vgpu_remove_support(struct platform_device *dev)
+{
+       struct gk20a *g = get_gk20a(dev);
+       struct gk20a_platform *platform = gk20a_get_platform(dev);
+       struct tegra_vgpu_intr_msg msg;
+       int err;
+
+       if (g->pmu.remove_support)
+               g->pmu.remove_support(&g->pmu);
+
+       if (g->gr.remove_support)
+               g->gr.remove_support(&g->gr);
+
+       if (g->fifo.remove_support)
+               g->fifo.remove_support(&g->fifo);
+
+       if (g->mm.remove_support)
+               g->mm.remove_support(&g->mm);
+
+       msg.event = TEGRA_VGPU_EVENT_ABORT;
+       err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT,
+                               TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR,
+                               &msg, sizeof(msg));
+       WARN_ON(err);
+       kthread_stop(platform->intr_handler);
+
+       /* free mappings to registers, etc*/
+
+       if (g->bar1) {
+               iounmap(g->bar1);
+               g->bar1 = 0;
+       }
+}
+
+static int vgpu_init_support(struct platform_device *dev)
+{
+       struct resource *r = platform_get_resource(dev, IORESOURCE_MEM, 0);
+       struct gk20a *g = get_gk20a(dev);
+       int err = 0;
+
+       if (!r) {
+               dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n");
+               err = -ENXIO;
+               goto fail;
+       }
+
+       g->bar1 = devm_request_and_ioremap(&dev->dev, r);
+       if (!g->bar1) {
+               dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
+               err = -ENXIO;
+               goto fail;
+       }
+
+       mutex_init(&g->dbg_sessions_lock);
+       mutex_init(&g->client_lock);
+
+       g->remove_support = vgpu_remove_support;
+       return 0;
+
+ fail:
+       vgpu_remove_support(dev);
+       return err;
+}
+
+int vgpu_pm_prepare_poweroff(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct gk20a *g = get_gk20a(pdev);
+       int ret = 0;
+
+       gk20a_dbg_fn("");
+
+       if (!g->power_on)
+               return 0;
+
+       ret = gk20a_channel_suspend(g);
+       if (ret)
+               return ret;
+
+       g->power_on = false;
+
+       return ret;
+}
+
+static void vgpu_detect_chip(struct gk20a *g)
+{
+       struct nvhost_gpu_characteristics *gpu = &g->gpu_characteristics;
+       struct gk20a_platform *platform = gk20a_get_platform(g->dev);
+
+       u32 mc_boot_0_value;
+
+       if (vgpu_get_attribute(platform->virt_handle,
+                       TEGRA_VGPU_ATTRIB_PMC_BOOT_0,
+                       &mc_boot_0_value)) {
+               gk20a_err(dev_from_gk20a(g), "failed to detect chip");
+               return;
+       }
+
+       gpu->arch = mc_boot_0_architecture_v(mc_boot_0_value) <<
+               NVHOST_GPU_ARCHITECTURE_SHIFT;
+       gpu->impl = mc_boot_0_implementation_v(mc_boot_0_value);
+       gpu->rev =
+               (mc_boot_0_major_revision_v(mc_boot_0_value) << 4) |
+               mc_boot_0_minor_revision_v(mc_boot_0_value);
+
+       gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
+                       g->gpu_characteristics.arch,
+                       g->gpu_characteristics.impl,
+                       g->gpu_characteristics.rev);
+}
+
+static int vgpu_init_hal(struct gk20a *g)
+{
+       u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
+
+       switch (ver) {
+       case GK20A_GPUID_GK20A:
+               gk20a_dbg_info("gk20a detected");
+               /* init gk20a ops then override with virt extensions */
+               gk20a_init_hal(&g->ops);
+               vgpu_init_fifo_ops(&g->ops);
+               vgpu_init_gr_ops(&g->ops);
+               vgpu_init_ltc_ops(&g->ops);
+               vgpu_init_mm_ops(&g->ops);
+               break;
+       default:
+               gk20a_err(&g->dev->dev, "no support for %x", ver);
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+int vgpu_pm_finalize_poweron(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct gk20a *g = get_gk20a(pdev);
+       int err;
+
+       gk20a_dbg_fn("");
+
+       if (g->power_on)
+               return 0;
+
+       g->power_on = true;
+
+       vgpu_detect_chip(g);
+       err = vgpu_init_hal(g);
+       if (err)
+               goto done;
+
+       err = vgpu_init_mm_support(g);
+       if (err) {
+               gk20a_err(dev, "failed to init gk20a mm");
+               goto done;
+       }
+
+       err = vgpu_init_fifo_support(g);
+       if (err) {
+               gk20a_err(dev, "failed to init gk20a fifo");
+               goto done;
+       }
+
+       err = vgpu_init_gr_support(g);
+       if (err) {
+               gk20a_err(dev, "failed to init gk20a gr");
+               goto done;
+       }
+
+       err = gk20a_init_gpu_characteristics(g);
+       if (err) {
+               gk20a_err(dev, "failed to init gk20a gpu characteristics");
+               goto done;
+       }
+
+       gk20a_channel_resume(g);
+
+done:
+       return err;
+}
+
+static int vgpu_pm_init(struct platform_device *dev)
+{
+       int err = 0;
+
+       gk20a_dbg_fn("");
+
+       pm_runtime_enable(&dev->dev);
+       return err;
+}
+
+int vgpu_probe(struct platform_device *dev)
+{
+       struct gk20a *gk20a;
+       int err;
+       struct gk20a_platform *platform = gk20a_get_platform(dev);
+
+       if (!platform) {
+               dev_err(&dev->dev, "no platform data\n");
+               return -ENODATA;
+       }
+
+       gk20a_dbg_fn("");
+
+       gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
+       if (!gk20a) {
+               dev_err(&dev->dev, "couldn't allocate gk20a support");
+               return -ENOMEM;
+       }
+
+       platform->g = gk20a;
+       gk20a->dev = dev;
+
+       err = gk20a_user_init(dev);
+       if (err)
+               return err;
+
+       vgpu_init_support(dev);
+
+       init_rwsem(&gk20a->busy_lock);
+
+       spin_lock_init(&gk20a->mc_enable_lock);
+
+       /* Initialize the platform interface. */
+       err = platform->probe(dev);
+       if (err) {
+               dev_err(&dev->dev, "platform probe failed");
+               return err;
+       }
+
+       err = vgpu_pm_init(dev);
+       if (err) {
+               dev_err(&dev->dev, "pm init failed");
+               return err;
+       }
+
+       if (platform->late_probe) {
+               err = platform->late_probe(dev);
+               if (err) {
+                       dev_err(&dev->dev, "late probe failed");
+                       return err;
+               }
+       }
+
+       err = vgpu_comm_init(dev);
+       if (err) {
+               dev_err(&dev->dev, "failed to init comm interface\n");
+               return -ENOSYS;
+       }
+
+       platform->virt_handle = vgpu_connect();
+       if (!platform->virt_handle) {
+               dev_err(&dev->dev, "failed to connect to server node\n");
+               vgpu_comm_deinit();
+               return -ENOSYS;
+       }
+
+       platform->intr_handler = kthread_run(vgpu_intr_thread, gk20a, "gk20a");
+       if (IS_ERR(platform->intr_handler))
+               return -ENOMEM;
+
+       gk20a_debug_init(dev);
+
+       /* Set DMA parameters to allow larger sgt lists */
+       dev->dev.dma_parms = &gk20a->dma_parms;
+       dma_set_max_seg_size(&dev->dev, UINT_MAX);
+
+       gk20a->gr_idle_timeout_default =
+                       CONFIG_GK20A_DEFAULT_TIMEOUT;
+       gk20a->timeouts_enabled = true;
+
+       gk20a_create_sysfs(dev);
+       gk20a_init_gr(gk20a);
+
+       return 0;
+}
+
+int vgpu_remove(struct platform_device *dev)
+{
+       struct gk20a *g = get_gk20a(dev);
+       gk20a_dbg_fn("");
+
+       if (g->remove_support)
+               g->remove_support(dev);
+
+       vgpu_comm_deinit();
+       gk20a_user_deinit(dev);
+       gk20a_get_platform(dev)->g = NULL;
+       kfree(g);
+       return 0;
+}
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/vgpu/vgpu.h
new file mode 100644 (file)
index 0000000..445a1c9
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Virtualized GPU Interfaces
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VIRT_H_
+#define _VIRT_H_
+
+#include <linux/tegra_gr_comm.h>
+#include <linux/tegra_vgpu.h>
+#include "gk20a/gk20a.h"
+
+int vgpu_pm_prepare_poweroff(struct device *dev);
+int vgpu_pm_finalize_poweron(struct device *dev);
+int vgpu_probe(struct platform_device *dev);
+int vgpu_remove(struct platform_device *dev);
+u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size);
+int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
+void vgpu_init_fifo_ops(struct gpu_ops *gops);
+void vgpu_init_gr_ops(struct gpu_ops *gops);
+void vgpu_init_ltc_ops(struct gpu_ops *gops);
+void vgpu_init_mm_ops(struct gpu_ops *gops);
+int vgpu_init_mm_support(struct gk20a *g);
+int vgpu_init_gr_support(struct gk20a *g);
+int vgpu_init_fifo_support(struct gk20a *g);
+
+int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value);
+int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
+               size_t size_out);
+
+#endif