video: tegra: dc: window bandwidth calculations
Michael Frydrych [Tue, 2 Aug 2011 13:05:33 +0000 (16:05 +0300)]
Determine which windows are overlapping, and apply bandwidth calculations
for emc clock scaling and latency allowance appropriately.

Bug 856234
Bug 850602

Original-Change-Id: If587c46e8929b3885b25125f054f5cc2d22b2b58
Reviewed-on: http://git-master/r/44772
Reviewed-by: Varun Colbert <vcolbert@nvidia.com>
Tested-by: Varun Colbert <vcolbert@nvidia.com>

Rebase-Id: R841ce1734a7afab1311d3367a72ba9755d6d539c

drivers/video/tegra/dc/dc.c
drivers/video/tegra/dc/dc_priv.h
drivers/video/tegra/dc/overlay.c

index 5a7d1c9..ad18d89 100644 (file)
@@ -483,7 +483,7 @@ out:
        return ret;
 }
 
-static unsigned int tegra_dc_has_multiple_dc(void)
+unsigned int tegra_dc_has_multiple_dc(void)
 {
        unsigned int idx;
        unsigned int cnt = 0;
@@ -643,15 +643,15 @@ static void tegra_dc_set_latency_allowance(struct tegra_dc *dc,
        BUG_ON(dc->ndev->id >= ARRAY_SIZE(vfilter_tab));
        BUG_ON(w->idx >= ARRAY_SIZE(*la_id_tab));
 
+       /* our bandwidth is in bytes/sec, but LA takes MBps.
+        * round up bandwidth to 1MBps */
+       bw = w->new_bandwidth / 1000000 + 1;
+
        /* tegra_dc_get_bandwidth() treats V filter windows as double
         * bandwidth, but LA has a seperate client for V filter */
        if (w->idx == 1 && WIN_USE_V_FILTER(w))
                bw /= 2;
 
-       /* our bandwidth is in bytes/sec, but LA takes MBps.
-        * round up bandwidth to 1MBps */
-       bw = w->new_bandwidth / 1000000 + 1;
-
        tegra_set_latency_allowance(la_id_tab[dc->ndev->id][w->idx], bw);
 
        /* if window B, also set the 1B client for the 2-tap V filter. */
@@ -666,31 +666,66 @@ static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a,
 {
        if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b))
                return 0;
+
+       /* because memory access to load the fifo can overlap, only care
+        * if windows overlap vertically */
        return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) ||
-              ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
+               ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
 }
 
-static unsigned int tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
-                                               int n)
+static unsigned long tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
+                                                int n)
 {
-       /* We have n windows and knows their geometries and bandwidthes. If any
-        * of them overlapped vertically, the overlapped area bandwidth get
-        * combined.
+       unsigned i;
+       unsigned j;
+       unsigned overlap_count;
+       unsigned max_bw = 0;
+
+       WARN_ONCE(n > 3, "Code assumes at most 3 windows, bandwidth is likely"
+                        "inaccurate.\n");
+
+       /* If we had a large number of windows, we would compute adjacency
+        * graph representing 2 window overlaps, find all cliques in the graph,
+        * assign bandwidth to each clique, and then select the clique with
+        * maximum bandwidth. But because we have at most 3 windows,
+        * implementing proper Bron-Kerbosh algorithm would be an overkill,
+        * brute force will suffice.
         *
-        * This function will find the maximum bandwidth of overlapped area.
-        * If there is no windows overlapped, then return the maximum
-        * bandwidth of windows.
+        * Thus: find maximum bandwidth for either single or a pair of windows
+        * and count number of window pair overlaps. If there are three
+        * pairs, all 3 window overlap.
         */
 
-       WARN_ONCE(n != 3, "Code assumes 3 windows, possibly reading junk.\n");
-       /* We know win_2 is always overlapped with win_0 and win_1. */
-       if (tegra_dc_windows_is_overlapped(wins[0], wins[1]))
-               return wins[0]->new_bandwidth + wins[1]->new_bandwidth +
-                       wins[2]->new_bandwidth;
-       else
-               return max(wins[0]->new_bandwidth, wins[1]->new_bandwidth) +
-                       wins[2]->new_bandwidth;
+       overlap_count = 0;
+       for (i = 0; i < n; i++) {
+               unsigned int bw1;
+
+               if (wins[i] == NULL)
+                       continue;
+               bw1 = wins[i]->new_bandwidth;
+               if (bw1 > max_bw)
+                       /* Single window */
+                       max_bw = bw1;
+
+               for (j = i + 1; j < n; j++) {
+                       if (wins[j] == NULL)
+                               continue;
+                       if (tegra_dc_windows_is_overlapped(wins[i], wins[j])) {
+                               unsigned int bw2 = wins[j]->new_bandwidth;
+                               if (bw1 + bw2 > max_bw)
+                                       /* Window pair overlaps */
+                                       max_bw = bw1 + bw2;
+                               overlap_count++;
+                       }
+               }
+       }
 
+       if (overlap_count == 3)
+               /* All three windows overlap */
+               max_bw = wins[0]->new_bandwidth + wins[1]->new_bandwidth +
+                        wins[2]->new_bandwidth;
+
+       return max_bw;
 }
 
 /*
@@ -708,17 +743,26 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc,
 {
        unsigned long ret;
        int tiled_windows_bw_multiplier;
+       unsigned long bpp;
 
        if (!WIN_IS_ENABLED(w))
                return 0;
 
+       if (w->w == 0 || w->h == 0 || w->out_w == 0 || w->out_h == 0)
+               return 0;
+
        tiled_windows_bw_multiplier =
                tegra_mc_get_tiled_memory_bandwidth_multiplier();
 
+       /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel,
+        * but the size reported by tegra_dc_fmt_bpp for the planar version
+        * is of the luma plane's size only. */
+       bpp = tegra_dc_is_yuv_planar(w->fmt) ?
+               2 * tegra_dc_fmt_bpp(w->fmt) : tegra_dc_fmt_bpp(w->fmt);
        /* perform calculations with most significant bits of pixel clock
         * to prevent overflow of long. */
        ret = (unsigned long)(dc->pixel_clk >> 16) *
-               (tegra_dc_fmt_bpp(w->fmt) / 8) *
+               bpp / 8 *
                (WIN_USE_V_FILTER(w) ? 2 : 1) * w->w / w->out_w *
                (WIN_IS_TILED(w) ? tiled_windows_bw_multiplier : 1);
 
@@ -740,19 +784,15 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc,
 unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *windows[], int n)
 {
        int i;
-       struct tegra_dc *dc;
 
-       if (windows[0] == NULL)
-               return tegra_dc_get_default_emc_clk_rate(dc);
-
-       dc = windows[0]->dc;
        BUG_ON(n > DC_N_WINDOWS);
+
        /* emc rate and latency allowance both need to know per window
         * bandwidths */
        for (i = 0; i < n; i++) {
                struct tegra_dc_win *w = windows[i];
                if (w)
-                       w->new_bandwidth = tegra_dc_calc_win_bandwidth(dc, w);
+                       w->new_bandwidth = tegra_dc_calc_win_bandwidth(w->dc, w);
        }
 
        return tegra_dc_find_max_bandwidth(windows, n);
@@ -788,13 +828,8 @@ static int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n)
        new_rate = tegra_dc_get_bandwidth(windows, n);
        new_rate = EMC_BW_TO_FREQ(new_rate);
 
-       WARN_ONCE(new_rate > tegra_dc_get_default_emc_clk_rate(dc),
-               "Calculated EMC bandwidth is %luHz, "
-               "maximum allowed EMC bandwidth is %luHz\n",
-               new_rate, tegra_dc_get_default_emc_clk_rate(dc));
-
        if (tegra_dc_has_multiple_dc())
-               new_rate = tegra_dc_get_default_emc_clk_rate(dc);
+               new_rate = ULONG_MAX;
 
        dc->new_emc_clk_rate = new_rate;
 
@@ -1274,6 +1309,10 @@ static int tegra_dc_program_mode(struct tegra_dc *dc, struct tegra_dc_mode *mode
 
        print_mode(dc, mode, __func__);
 
+       /* use default EMC rate when switching modes */
+       dc->new_emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+       tegra_dc_program_bandwidth(dc);
+
        tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
        tegra_dc_writel(dc, mode->h_ref_to_sync | (mode->v_ref_to_sync << 16),
                        DC_DISP_REF_TO_SYNC);
@@ -2002,6 +2041,8 @@ void tegra_dc_enable(struct tegra_dc *dc)
 
 static void _tegra_dc_controller_disable(struct tegra_dc *dc)
 {
+       unsigned i;
+
        disable_irq(dc->irq);
 
        if (dc->out_ops && dc->out_ops->disable)
@@ -2011,6 +2052,12 @@ static void _tegra_dc_controller_disable(struct tegra_dc *dc)
        clk_disable(dc->clk);
        tegra_dvfs_set_rate(dc->clk, 0);
 
+       for (i = 0; i < DC_N_WINDOWS; i++) {
+               struct tegra_dc_win *w = &dc->windows[i];
+               w->bandwidth = 0;
+               w->new_bandwidth = 0;
+       }
+
        if (dc->out && dc->out->disable)
                dc->out->disable();
 
index a1b434c..ebd339c 100644 (file)
@@ -193,6 +193,7 @@ void tegra_dc_stats_enable(struct tegra_dc *dc, bool enable);
 bool tegra_dc_stats_get(struct tegra_dc *dc);
 
 /* defined in dc.c, used by overlay.c */
+unsigned int tegra_dc_has_multiple_dc(void);
 unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *wins[], int n);
 
 /* defined in dc.c, used by dc_sysfs.c */
index 1db1cb4..7879100 100644 (file)
@@ -405,7 +405,7 @@ surf_err:
 
 static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev)
 {
-       unsigned long emc_freq = 0;
+       unsigned long new_rate;
        int i;
        struct tegra_dc_win *win;
        struct tegra_dc_win *wins[DC_N_WINDOWS];
@@ -415,19 +415,13 @@ static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev)
                wins[i] = win;
        }
 
-       emc_freq = tegra_dc_get_bandwidth(wins, dev->dc->n_windows);
+       new_rate = tegra_dc_get_bandwidth(wins, dev->dc->n_windows);
+       new_rate = EMC_BW_TO_FREQ(new_rate);
 
-       if (emc_freq  > tegra_dc_get_default_emc_clk_rate(dev->dc)) {
-               WARN_ONCE(emc_freq > tegra_dc_get_default_emc_clk_rate(dev->dc),
-                               "Overlay: calculated EMC bandwidth is %luHz greater "
-                               "than maximum allowed %luHz. Setting to max.\n",
-                               emc_freq,
-                               tegra_dc_get_default_emc_clk_rate(dev->dc));
+       if (tegra_dc_has_multiple_dc())
+               new_rate = ULONG_MAX;
 
-               emc_freq = tegra_dc_get_default_emc_clk_rate(dev->dc);
-       }
-
-       clk_set_rate(dev->dc->emc_clk, emc_freq);
+       clk_set_rate(dev->dc->emc_clk, new_rate);
 }
 
 /* Overlay functions */