video: tegra: host: gk20a: Rework MMU fault
Arto Merilainen [Mon, 9 Sep 2013 08:40:31 +0000 (11:40 +0300)]
This patch updates MMU fault sequency to following:
1) Lock all runlists
2) Disable all channels whose context is on faulty engines.
3) Unlock runlists.
4) Reset the hardware
5) Update runlists.
6) Clear MMU faults
7) Restart scheduler

These modifications, overall, allow:
- faulting multiple engines at the same time. This scenario is
  possible if i.e. preempt or runlist update fails due to multiple
  busy engines.
- The sequence used to reset the faulty engines before cleaning the
  channels which in some cases leaved the engines to undefined
  state. Now that the reset is done after channel clean up, we do not
  hit that issue.
- This patch adds support for multiple runlists. Even though we
  currently have only a single runlist, most code has assumed that
  there may be more runlists in the future. Therefore this patch
  makes also the mmu ISR consistent with that.
- Channel pre-empt was removed. Pre-empt must not be done and it
  may lead hardware into inconsistent state.

Bug 1351268

Change-Id: I1770b7c4ce613ed106585dc7b143ec3f431dedc4
Signed-off-by: Arto Merilainen <amerilainen@nvidia.com>
Reviewed-on: http://git-master/r/272025
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>

drivers/video/tegra/host/gk20a/channel_gk20a.c
drivers/video/tegra/host/gk20a/channel_gk20a.h
drivers/video/tegra/host/gk20a/fifo_gk20a.c

index da49f11..4ac456b 100644 (file)
@@ -366,27 +366,33 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
        return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
 }
 
-void gk20a_disable_channel(struct channel_gk20a *ch,
-                          bool finish,
-                          unsigned long finish_timeout)
+void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
 {
        struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
        struct nvhost_master *host = host_from_gk20a_channel(ch);
-       int err;
-
-       if (finish) {
-               err = gk20a_channel_finish(ch, finish_timeout);
-               WARN_ON(err);
-       }
 
        /* ensure no fences are pending */
        nvhost_syncpt_set_min_eq_max(&host->syncpt,
-                       ch->hw_chid + pdata->syncpt_base);
+                                    ch->hw_chid + pdata->syncpt_base);
 
        /* disable channel */
        gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
-               gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
-               ccsr_channel_enable_clr_true_f());
+                    gk20a_readl(ch->g,
+                    ccsr_channel_r(ch->hw_chid)) |
+                    ccsr_channel_enable_clr_true_f());
+}
+
+void gk20a_disable_channel(struct channel_gk20a *ch,
+                          bool finish,
+                          unsigned long finish_timeout)
+{
+       if (finish) {
+               int err = gk20a_channel_finish(ch, finish_timeout);
+               WARN_ON(err);
+       }
+
+       /* disable the channel from hw and increment syncpoints */
+       gk20a_disable_channel_no_update(ch);
 
        /* preempt the channel */
        gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
index 4162332..5ade025 100644 (file)
@@ -174,6 +174,7 @@ void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish);
 void gk20a_disable_channel(struct channel_gk20a *ch,
                           bool wait_for_finish,
                           unsigned long finish_timeout);
+void gk20a_disable_channel_no_update(struct channel_gk20a *ch);
 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout);
 int gk20a_channel_wait(struct channel_gk20a *ch,
                       struct nvhost_wait_args *args);
index 47634d5..75151c6 100644 (file)
@@ -802,11 +802,10 @@ static void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
 
 static void gk20a_fifo_handle_mmu_fault(struct gk20a *g)
 {
-       /* for storing (global) information about the fault */
        bool fake_fault;
        ulong fault_id;
-
        u32 engine_mmu_id;
+       int i;
 
        nvhost_dbg_fn("");
 
@@ -820,16 +819,22 @@ static void gk20a_fifo_handle_mmu_fault(struct gk20a *g)
        } else
                fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
 
+       /* lock all runlists */
+       for (i = 0; i < g->fifo.max_runlists; i++)
+               mutex_lock(&g->fifo.runlist_info[i].mutex);
+
+       /* go through all faulted engines */
        for_each_set_bit(engine_mmu_id, &fault_id, BITS_PER_LONG) {
                /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
                 * engines. Convert engine_mmu_id to engine_id */
                u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
+               u32 runlist_id = g->fifo.engine_info[engine_id].runlist_id;
+               struct fifo_runlist_info_gk20a *runlist =
+                       &g->fifo.runlist_info[runlist_id];
                struct fifo_mmu_fault_info_gk20a f;
-
-               struct channel_gk20a *fault_ch = NULL;
+               struct channel_gk20a *ch = NULL;
 
                get_exception_mmu_fault_info(g, engine_id, &f);
-
                trace_nvhost_gk20a_mmu_fault(f.fault_hi_v,
                                             f.fault_lo_v,
                                             f.fault_info_v,
@@ -838,7 +843,6 @@ static void gk20a_fifo_handle_mmu_fault(struct gk20a *g)
                                             f.engine_subid_desc,
                                             f.client_desc,
                                             f.fault_type_desc);
-
                nvhost_err(dev_from_gk20a(g), "mmu fault on engine %d, "
                           "engined subid %d (%s), client %d (%s), "
                           "addr 0x%08x:0x%08x, type %d (%s), info 0x%08x,"
@@ -867,47 +871,67 @@ static void gk20a_fifo_handle_mmu_fault(struct gk20a *g)
                                fifo_engine_status_id_v(status);
 
                        if (type_ch) {
-                               fault_ch = g->fifo.channel + id;
+                               ch = g->fifo.channel + id;
                        } else {
                                nvhost_err(dev_from_gk20a(g), "TSG is not supported");
                                WARN_ON(1);
                        }
                } else {
                        /* read channel based on instruction pointer */
-                       fault_ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
+                       ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
                }
 
-               /* Reset engine and MMU fault */
-               gk20a_fifo_reset_engine(g, engine_id);
-
-               if (!fault_ch)
-                       fault_ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr);
-               if (fault_ch) {
-                       if (fault_ch->hwctx) {
-                               nvhost_dbg_fn("channel with hwctx has generated an mmu fault");
-                               fault_ch->hwctx->has_timedout = true;
+               if (ch) {
+                       if (ch->hwctx) {
+                               nvhost_err(dev_from_gk20a(g), "channel with hwctx has generated an mmu fault");
+                               ch->hwctx->has_timedout = true;
                        }
-                       if (fault_ch->in_use) {
-                               gk20a_disable_channel(fault_ch, false,
-                                               gk20a_get_gr_idle_timeout(g));
-                               fault_ch->hwctx->has_timedout = true;
+
+                       if (ch->in_use) {
+
+                               /* disable the channel from hw and increment
+                                * syncpoints */
+                               gk20a_disable_channel_no_update(ch);
+
+                               /* remove the channel from runlist */
+                               clear_bit(ch->hw_chid,
+                                         runlist->active_channels);
+
+                               /* mark channel as faulted */
+                               ch->hwctx->has_timedout = true;
                        }
                } else if (f.inst_ptr ==
                                sg_phys(g->mm.bar1.inst_block.mem.sgt->sgl)) {
-                       nvhost_dbg_fn("mmu fault from bar1");
+                       nvhost_err(dev_from_gk20a(g), "mmu fault from bar1");
                } else if (f.inst_ptr ==
                                sg_phys(g->mm.pmu.inst_block.mem.sgt->sgl)) {
-                       nvhost_dbg_fn("mmu fault from pmu");
+                       nvhost_err(dev_from_gk20a(g), "mmu fault from pmu");
                } else
-                       nvhost_dbg_fn("couldn't locate channel for mmu fault");
+                       nvhost_err(dev_from_gk20a(g), "couldn't locate channel for mmu fault");
+       }
 
-               /* clear mmu interrupt */
-               gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
+       /* unlock all runlists */
+       for (i = 0; i < g->fifo.max_runlists; i++)
+               mutex_unlock(&g->fifo.runlist_info[i].mutex);
 
-               /* resume scheduler */
-               gk20a_writel(g, fifo_error_sched_disable_r(),
-                               gk20a_readl(g, fifo_error_sched_disable_r()));
+       /* reset engines */
+       for_each_set_bit(engine_mmu_id, &fault_id, BITS_PER_LONG) {
+               u32 engine_id = gk20a_mmu_id_to_engine_id(engine_mmu_id);
+               gk20a_fifo_reset_engine(g, engine_id);
        }
+
+       /* update the runlists. Do not wait for runlist to start as
+        * the scheduler is currently disabled and it would therefore
+        * fail anyway */
+       for (i = 0; i < g->fifo.max_runlists; i++)
+               gk20a_fifo_update_runlist(g, i, ~0, true, false);
+
+       /* clear interrupt */
+       gk20a_writel(g, fifo_intr_mmu_fault_id_r(), fault_id);
+
+       /* resume scheduler */
+       gk20a_writel(g, fifo_error_sched_disable_r(),
+                    gk20a_readl(g, fifo_error_sched_disable_r()));
 }
 
 void gk20a_fifo_recover(struct gk20a *g, ulong engine_ids)