b98ced801f7cf5497355ebd37634d3010136c3e1
[linux-3.10.git] / drivers / video / tegra / dc / bandwidth.c
1 /*
2  * drivers/video/tegra/dc/bandwidth.c
3  *
4  * Copyright (c) 2010-2014, NVIDIA CORPORATION, All rights reserved.
5  *
6  * Author: Jon Mayo <jmayo@nvidia.com>
7  *
8  * This software is licensed under the terms of the GNU General Public
9  * License version 2, as published by the Free Software Foundation, and
10  * may be copied, distributed, and modified under those terms.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  */
18
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/clk.h>
22 #include <linux/clk/tegra.h>
23
24 #include <mach/dc.h>
25 #include <mach/fb.h>
26 #include <mach/mc.h>
27 #include <linux/nvhost.h>
28 #include <mach/latency_allowance.h>
29 #include <trace/events/display.h>
30
31 #include "dc_reg.h"
32 #include "dc_config.h"
33 #include "dc_priv.h"
34
35 static int use_dynamic_emc = 1;
36
37 module_param_named(use_dynamic_emc, use_dynamic_emc, int, S_IRUGO | S_IWUSR);
38
39 #ifdef CONFIG_ARCH_TEGRA_12x_SOC
40 static unsigned int tegra_dcs_total_bw[TEGRA_MAX_DC] = {0};
41 DEFINE_MUTEX(tegra_dcs_total_bw_lock);
42 #endif
43
44 /* windows A, B, C for first and second display */
45 static const enum tegra_la_id la_id_tab[2][DC_N_WINDOWS] = {
46         /* first display */
47         {
48                 TEGRA_LA_DISPLAY_0A,
49                 TEGRA_LA_DISPLAY_0B,
50                 TEGRA_LA_DISPLAY_0C,
51 #if defined(CONFIG_ARCH_TEGRA_14x_SOC) || defined(CONFIG_ARCH_TEGRA_12x_SOC)
52                 TEGRA_LA_DISPLAYD,
53 #endif
54 #if defined(CONFIG_ARCH_TEGRA_14x_SOC)
55                 TEGRA_LA_DISPLAY_HC,
56 #endif
57 #if defined(CONFIG_ARCH_TEGRA_12x_SOC)
58                 TEGRA_LA_DISPLAY_T,
59 #endif
60         },
61         /* second display */
62         {
63                 TEGRA_LA_DISPLAY_0AB,
64                 TEGRA_LA_DISPLAY_0BB,
65                 TEGRA_LA_DISPLAY_0CB,
66 #if defined(CONFIG_ARCH_TEGRA_14x_SOC)
67                 0,
68                 TEGRA_LA_DISPLAY_HCB,
69 #endif
70         },
71 };
72
73 #ifdef CONFIG_ARCH_TEGRA_12x_SOC
74 static bool is_internal_win(enum tegra_la_id id)
75 {
76         return ((id == TEGRA_LA_DISPLAY_0A) || (id == TEGRA_LA_DISPLAY_0B) ||
77                 (id == TEGRA_LA_DISPLAY_0C) || (id == TEGRA_LA_DISPLAYD) ||
78                 (id == TEGRA_LA_DISPLAY_HC) || (id == TEGRA_LA_DISPLAY_T));
79 }
80
81 static unsigned int num_active_internal_wins(struct tegra_dc *dc)
82 {
83         unsigned int num_active_internal_wins = 0;
84         int i = 0;
85
86         for_each_set_bit(i, &dc->valid_windows, DC_N_WINDOWS) {
87                 struct tegra_dc_win *curr_win = &dc->windows[i];
88                 enum tegra_la_id curr_win_la_id =
89                                 la_id_tab[dc->ndev->id][curr_win->idx];
90
91                 if (!is_internal_win(curr_win_la_id))
92                         continue;
93
94                 if (WIN_IS_ENABLED(curr_win))
95                         num_active_internal_wins++;
96         }
97
98         return num_active_internal_wins;
99 }
100
101 static unsigned int num_active_external_wins(struct tegra_dc *dc)
102 {
103         unsigned int num_active_external_wins = 0;
104         int i = 0;
105
106         for_each_set_bit(i, &dc->valid_windows, DC_N_WINDOWS) {
107                 struct tegra_dc_win *curr_win = &dc->windows[i];
108                 enum tegra_la_id curr_win_la_id =
109                                 la_id_tab[dc->ndev->id][curr_win->idx];
110
111                 if (is_internal_win(curr_win_la_id))
112                         continue;
113
114                 if (WIN_IS_ENABLED(curr_win))
115                         num_active_external_wins++;
116         }
117
118         return num_active_external_wins;
119 }
120
121 /*
122  * Note about fixed point arithmetic:
123  * ----------------------------------
124  * calc_disp_params(...) contains fixed point values and arithmetic due to the
125  * need to use floating point values. All fixed point values have the "_fp" or
126  * "_FP" suffix in their name. Functions/values used to convert between real and
127  * fixed point values are listed below:
128  *    - la_params.fp_factor
129  *    - la_params.la_real_to_fp(real_val)
130  *    - la_params.la_fp_to_real(fp_val)
131  */
132
133 #define T12X_LA_BW_DISRUPTION_TIME_EMCCLKS_FP                   1342000
134 #define T12X_LA_STATIC_LA_SNAP_ARB_TO_ROW_SRT_EMCCLKS_FP        54000
135 #define T12X_LA_CONS_MEM_EFFICIENCY_FP                          500
136 #define T12X_LA_ROW_SRT_SZ_BYTES        (64 * (T12X_LA_MC_EMEM_NUM_SLOTS + 1))
137 #define T12X_LA_MC_EMEM_NUM_SLOTS                               63
138 #define T12X_LA_MAX_DRAIN_TIME_USEC                             10
139
140 /*
141  * Function outputs:
142  *    - disp_params->thresh_lwm_bytes
143  *    - disp_params->spool_up_buffering_adj_bytes
144  *    - disp_params->total_dc0_bw
145  *    - disp_params->total_dc1_bw
146  */
147 static void calc_disp_params(struct tegra_dc *dc,
148                                 struct tegra_dc_win *w,
149                                 enum tegra_la_id la_id,
150                                 unsigned int bw_mbps,
151                                 struct dc_to_la_params *disp_params) {
152         const struct disp_client *disp_clients_info =
153                                                 tegra_la_disp_clients_info;
154         struct la_to_dc_params la_params = tegra_get_la_to_dc_params();
155         unsigned int bw_mbps_fp = la_params.la_real_to_fp(bw_mbps);
156         bool active = WIN_IS_ENABLED(w);
157         bool win_rotated = false;
158         unsigned int window_width = dfixed_trunc(w->w);
159         unsigned int surface_width = 0;
160         bool vertical_scaling_enabled = false;
161         bool pitch = !WIN_IS_BLOCKLINEAR(w) && !WIN_IS_TILED(w);
162         bool planar = tegra_dc_is_yuv_planar(w->fmt);
163         bool packed_yuv422 =
164                         ((tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCbCr422) ||
165                         (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YUV422));
166         /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel,
167          * but the size reported by tegra_dc_fmt_bpp for the planar version
168          * is of the luma plane's size only. */
169         unsigned int bytes_per_pixel = tegra_dc_is_yuv_planar(w->fmt) ?
170                                 2 * tegra_dc_fmt_bpp(w->fmt) / 8 :
171                                 tegra_dc_fmt_bpp(w->fmt) / 8;
172         struct tegra_dc_mode mode = dc->mode;
173         unsigned int total_h = mode.h_active +
174                                 mode.h_front_porch +
175                                 mode.h_back_porch +
176                                 mode.h_sync_width;
177         unsigned int total_v = mode.v_active +
178                                 mode.v_front_porch +
179                                 mode.v_back_porch +
180                                 mode.v_sync_width;
181         unsigned int total_screen_area = total_h * total_v;
182         unsigned int total_active_area = mode.h_active * mode.v_active;
183         unsigned int total_blank_area = total_screen_area - total_active_area;
184         unsigned int c1_fp = 0;
185         unsigned int c2 = 0;
186         unsigned int c3 = 0;
187         unsigned int bpp_for_line_buffer_storage_fp = 0;
188         unsigned int reqd_buffering_thresh_disp_bytes_fp = 0;
189         unsigned int latency_buffering_available_in_reqd_buffering_fp = 0;
190         struct clk *emc_clk = clk_get(NULL, "emc");
191         unsigned long emc_freq_mhz = clk_get_rate(emc_clk)/1000000;
192         unsigned int bw_disruption_time_usec_fp =
193                                         T12X_LA_BW_DISRUPTION_TIME_EMCCLKS_FP /
194                                         emc_freq_mhz;
195         unsigned int effective_row_srt_sz_bytes_fp =
196                 min((unsigned long)la_params.la_real_to_fp(min(
197                                         (unsigned long)T12X_LA_ROW_SRT_SZ_BYTES,
198                                         16 * min(emc_freq_mhz + 50,
199                                                 400ul))),
200                         ((T12X_LA_MAX_DRAIN_TIME_USEC *
201                         emc_freq_mhz -
202                         la_params.la_fp_to_real(
203                         T12X_LA_STATIC_LA_SNAP_ARB_TO_ROW_SRT_EMCCLKS_FP) *
204                         2 *
205                         la_params.dram_width_bits /
206                         8 *
207                         T12X_LA_CONS_MEM_EFFICIENCY_FP)));
208         unsigned int drain_time_usec_fp =
209                         effective_row_srt_sz_bytes_fp *
210                         la_params.fp_factor /
211                         (emc_freq_mhz *
212                                 la_params.dram_width_bits /
213                                 4 *
214                                 T12X_LA_CONS_MEM_EFFICIENCY_FP) +
215                         T12X_LA_STATIC_LA_SNAP_ARB_TO_ROW_SRT_EMCCLKS_FP /
216                         emc_freq_mhz;
217         unsigned int total_latency_usec_fp =
218                 drain_time_usec_fp +
219                 la_params.static_la_minus_snap_arb_to_row_srt_emcclks_fp /
220                 emc_freq_mhz;
221         unsigned int bw_disruption_buffering_bytes_fp =
222                                         bw_mbps *
223                                         max(bw_disruption_time_usec_fp,
224                                                 total_latency_usec_fp) +
225                                         la_params.la_real_to_fp(1)/2;
226         unsigned int reqd_lines = 0;
227         unsigned int lines_of_latency = 0;
228         unsigned int thresh_lwm_bytes = 0;
229         unsigned int total_buf_sz_bytes =
230         disp_clients_info[DISP_CLIENT_LA_ID(la_id)].line_buf_sz_bytes +
231         disp_clients_info[DISP_CLIENT_LA_ID(la_id)].mccif_size_bytes;
232         unsigned int num_active_wins_to_use = is_internal_win(la_id) ?
233                                                 num_active_internal_wins(dc) :
234                                                 num_active_external_wins(dc);
235         unsigned int total_active_space_bw = 0;
236         unsigned int total_vblank_bw = 0;
237         unsigned int bw_other_wins = 0;
238         unsigned int bw_display_fp = 0;
239         unsigned int bw_delta_fp = 0;
240         unsigned int fill_rate_other_wins_fp = 0;
241         unsigned int dvfs_time_nsec = tegra_get_dvfs_time_nsec(emc_freq_mhz);
242         unsigned int data_shortfall_other_wins_fp = 0;
243         unsigned int duration_usec_fp = 0;
244         unsigned int spool_up_buffering_adj_bytes = 0;
245         unsigned int curr_dc_head_bw = 0;
246
247         if (w->flags & TEGRA_WIN_FLAG_SCAN_COLUMN) {
248                 win_rotated = true;
249                 surface_width = dfixed_trunc(w->h);
250                 vertical_scaling_enabled = (dfixed_trunc(w->w) == w->out_w) ?
251                                         false : true;
252         } else {
253                 win_rotated = false;
254                 surface_width = dfixed_trunc(w->w);
255                 vertical_scaling_enabled = (dfixed_trunc(w->h) == w->out_h) ?
256                                         false : true;
257         }
258
259         if ((disp_clients_info[DISP_CLIENT_LA_ID(la_id)].line_buf_sz_bytes
260                                                                         == 0) ||
261                 (pitch == true)) {
262                 reqd_lines = 0;
263         } else if (win_rotated && planar) {
264                 if (vertical_scaling_enabled)
265                         reqd_lines = 17;
266                 else
267                         reqd_lines = 16;
268         } else {
269                 if (win_rotated) {
270                         if (vertical_scaling_enabled)
271                                 reqd_lines = 16 / bytes_per_pixel + 1;
272                         else
273                                 reqd_lines = 16 / bytes_per_pixel;
274                 } else {
275                         if (vertical_scaling_enabled)
276                                 reqd_lines = 3;
277                         else
278                                 reqd_lines = 1;
279                 }
280         }
281
282         if (reqd_lines > 0 && !vertical_scaling_enabled && win_rotated)
283                 lines_of_latency = 1;
284         else
285                 lines_of_latency = 0;
286
287
288         if (((tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCbCr422R) ||
289                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YUV422R) ||
290                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCbCr422RA) ||
291                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YUV422RA)) &&
292                 !win_rotated) {
293                 c1_fp = la_params.la_real_to_fp(5) / 2;
294         } else {
295                 c1_fp = la_params.la_real_to_fp(1);
296         }
297
298         if ((((tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCbCr420P) ||
299                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YUV420P) ||
300                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCrCb420SP) ||
301                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YCbCr420SP) ||
302                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YVU420SP) ||
303                 (tegra_dc_fmt(w->fmt) == TEGRA_WIN_FMT_YUV420SP)) &&
304                 !win_rotated) ||
305                 (tegra_dc_is_yuv(w->fmt) && win_rotated)) {
306                 c2 = 3;
307         } else {
308                 c2 = bytes_per_pixel;
309         }
310
311         c3 = (packed_yuv422 && win_rotated) ? 2 : 1;
312         latency_buffering_available_in_reqd_buffering_fp = active *
313                                                         surface_width *
314                                                         lines_of_latency *
315                                                         c1_fp *
316                                                         c2 *
317                                                         c3;
318
319         switch (tegra_dc_fmt(w->fmt)) {
320         /* YUV 420 case*/
321         case TEGRA_WIN_FMT_YCbCr420P:
322         case TEGRA_WIN_FMT_YUV420P:
323         case TEGRA_WIN_FMT_YCrCb420SP:
324         case TEGRA_WIN_FMT_YCbCr420SP:
325         case TEGRA_WIN_FMT_YVU420SP:
326         case TEGRA_WIN_FMT_YUV420SP:
327                 c1_fp = (win_rotated) ?
328                         la_params.la_real_to_fp(2) :
329                         la_params.la_real_to_fp(3);
330                 break;
331
332         /* YUV 422 case */
333         case TEGRA_WIN_FMT_YCbCr422:
334         case TEGRA_WIN_FMT_YUV422:
335         case TEGRA_WIN_FMT_YCbCr422P:
336         case TEGRA_WIN_FMT_YUV422P:
337         case TEGRA_WIN_FMT_YCrCb422SP:
338         case TEGRA_WIN_FMT_YCbCr422SP:
339         case TEGRA_WIN_FMT_YVU422SP:
340         case TEGRA_WIN_FMT_YUV422SP:
341                 c1_fp = (win_rotated) ?
342                         la_params.la_real_to_fp(3) :
343                         la_params.la_real_to_fp(2);
344                 break;
345
346         /* YUV 422R case */
347         case TEGRA_WIN_FMT_YCbCr422R:
348         case TEGRA_WIN_FMT_YUV422R:
349         case TEGRA_WIN_FMT_YCbCr422RA:
350         case TEGRA_WIN_FMT_YUV422RA:
351                 c1_fp = (win_rotated) ?
352                         la_params.la_real_to_fp(2) :
353                         la_params.la_real_to_fp(5);
354                 break;
355
356         /* YUV 444 case */
357         case TEGRA_WIN_FMT_YCbCr444P:
358         case TEGRA_WIN_FMT_YUV444P:
359         case TEGRA_WIN_FMT_YVU444SP:
360         case TEGRA_WIN_FMT_YUV444SP:
361                 c1_fp = la_params.la_real_to_fp(3);
362                 break;
363
364         default:
365                 c1_fp = la_params.la_real_to_fp(bytes_per_pixel);
366                 break;
367         }
368
369         c2 = (packed_yuv422 && win_rotated) ? 2 : 1;
370         bpp_for_line_buffer_storage_fp = c1_fp * c2;
371         reqd_buffering_thresh_disp_bytes_fp = active *
372                                                 surface_width *
373                                                 reqd_lines *
374                                                 bpp_for_line_buffer_storage_fp;
375         thresh_lwm_bytes =
376                 la_params.la_fp_to_real(
377                         reqd_buffering_thresh_disp_bytes_fp +
378                         bw_disruption_buffering_bytes_fp -
379                         latency_buffering_available_in_reqd_buffering_fp);
380         disp_params->thresh_lwm_bytes = thresh_lwm_bytes;
381
382
383         if (is_internal_win(la_id)) {
384                 int i = 0;
385
386                 for_each_set_bit(i, &dc->valid_windows, DC_N_WINDOWS) {
387                         struct tegra_dc_win *curr_win = &dc->windows[i];
388                         enum tegra_la_id curr_win_la_id =
389                                         la_id_tab[dc->ndev->id][curr_win->idx];
390                         unsigned int curr_win_bw = 0;
391
392                         if (!is_internal_win(curr_win_la_id))
393                                 continue;
394
395                         curr_win_bw = max(curr_win->bandwidth,
396                                                 curr_win->new_bandwidth);
397                         /* our bandwidth is in kbytes/sec, but LA takes MBps.
398                          * round up bandwidth to next 1MBps */
399                         if (curr_win_bw != UINT_MAX)
400                                 curr_win_bw = curr_win_bw / 1000 + 1;
401
402                         total_active_space_bw += curr_win_bw;
403                 }
404         } else {
405                 int i = 0;
406
407                 for_each_set_bit(i, &dc->valid_windows, DC_N_WINDOWS) {
408                         struct tegra_dc_win *curr_win = &dc->windows[i];
409                         enum tegra_la_id curr_win_la_id =
410                                         la_id_tab[dc->ndev->id][curr_win->idx];
411                         unsigned int curr_win_bw = 0;
412
413                         if (is_internal_win(curr_win_la_id))
414                                 continue;
415
416                         curr_win_bw = max(curr_win->bandwidth,
417                                                 curr_win->new_bandwidth);
418                         /* our bandwidth is in kbytes/sec, but LA takes MBps.
419                          * round up bandwidth to next 1MBps */
420                         if (curr_win_bw != UINT_MAX)
421                                 curr_win_bw = curr_win_bw / 1000 + 1;
422
423                         total_active_space_bw += curr_win_bw;
424                 }
425         }
426
427
428         if ((disp_clients_info[DISP_CLIENT_LA_ID(la_id)].win_type  ==
429                                                 TEGRA_LA_DISP_WIN_TYPE_FULL) ||
430                 (disp_clients_info[DISP_CLIENT_LA_ID(la_id)].win_type  ==
431                                                 TEGRA_LA_DISP_WIN_TYPE_FULLA) ||
432                 (disp_clients_info[DISP_CLIENT_LA_ID(la_id)].win_type  ==
433                                                 TEGRA_LA_DISP_WIN_TYPE_FULLB)) {
434                 total_vblank_bw = total_buf_sz_bytes / total_blank_area;
435         } else {
436                 total_vblank_bw = 0;
437         }
438
439
440         bw_display_fp = la_params.disp_catchup_factor_fp *
441                         max(total_active_space_bw,
442                                 total_vblank_bw);
443         if (active)
444                 bw_delta_fp = bw_mbps_fp -
445                                 (bw_display_fp /
446                                 num_active_wins_to_use);
447
448
449         bw_other_wins = total_active_space_bw - bw_mbps;
450
451         if (num_active_wins_to_use > 0) {
452                 fill_rate_other_wins_fp =
453                                 bw_display_fp *
454                                 (num_active_wins_to_use - active) /
455                                 num_active_wins_to_use -
456                                 la_params.la_real_to_fp(bw_other_wins);
457         } else {
458                 fill_rate_other_wins_fp = 0;
459         }
460
461         data_shortfall_other_wins_fp = dvfs_time_nsec *
462                                         bw_other_wins *
463                                         la_params.fp_factor /
464                                         1000;
465
466         duration_usec_fp = (fill_rate_other_wins_fp == 0) ? 0 :
467                                 data_shortfall_other_wins_fp *
468                                 la_params.fp_factor /
469                                 fill_rate_other_wins_fp;
470
471
472         spool_up_buffering_adj_bytes = (bw_delta_fp > 0) ?
473                                         (bw_delta_fp *
474                                         duration_usec_fp /
475                                         (la_params.fp_factor *
476                                         la_params.fp_factor)) :
477                                         0;
478         disp_params->spool_up_buffering_adj_bytes =
479                                                 spool_up_buffering_adj_bytes;
480
481         mutex_lock(&tegra_dcs_total_bw_lock);
482         curr_dc_head_bw = max(dc->new_bw_kbps, dc->bw_kbps);
483         /* our bandwidth is in kbytes/sec, but LA takes MBps.
484          * round up bandwidth to next 1MBps */
485         if (curr_dc_head_bw != ULONG_MAX)
486                 curr_dc_head_bw = curr_dc_head_bw / 1000 + 1;
487         tegra_dcs_total_bw[dc->ndev->id] = curr_dc_head_bw;
488         disp_params->total_dc0_bw = tegra_dcs_total_bw[0];
489         disp_params->total_dc1_bw = tegra_dcs_total_bw[1];
490         mutex_unlock(&tegra_dcs_total_bw_lock);
491 }
492 #endif
493
494
495 /* uses the larger of w->bandwidth or w->new_bandwidth */
496 static void tegra_dc_set_latency_allowance(struct tegra_dc *dc,
497         struct tegra_dc_win *w)
498 {
499         unsigned long bw;
500         struct dc_to_la_params disp_params;
501 #if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC)
502         /* window B V-filter tap for first and second display. */
503         static const enum tegra_la_id vfilter_tab[2] = {
504                 TEGRA_LA_DISPLAY_1B, TEGRA_LA_DISPLAY_1BB,
505         };
506 #endif
507
508         BUG_ON(dc->ndev->id >= ARRAY_SIZE(la_id_tab));
509 #if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC)
510         BUG_ON(dc->ndev->id >= ARRAY_SIZE(vfilter_tab));
511 #endif
512         BUG_ON(w->idx >= ARRAY_SIZE(*la_id_tab));
513
514         bw = max(w->bandwidth, w->new_bandwidth);
515
516 #if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC)
517         /* tegra_dc_get_bandwidth() treats V filter windows as double
518          * bandwidth, but LA has a seperate client for V filter */
519         if (w->idx == 1 && win_use_v_filter(dc, w))
520                 bw /= 2;
521 #endif
522
523         /* our bandwidth is in kbytes/sec, but LA takes MBps.
524          * round up bandwidth to next 1MBps */
525         if (bw != ULONG_MAX)
526                 bw = bw / 1000 + 1;
527
528 #ifdef CONFIG_ARCH_TEGRA_12x_SOC
529         calc_disp_params(dc,
530                         w,
531                         la_id_tab[dc->ndev->id][w->idx],
532                         bw,
533                         &disp_params);
534 #endif
535         tegra_set_disp_latency_allowance(la_id_tab[dc->ndev->id][w->idx],
536                                                 bw,
537                                                 disp_params);
538 #if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC)
539         /* if window B, also set the 1B client for the 2-tap V filter. */
540         if (w->idx == 1)
541                 tegra_set_latency_allowance(vfilter_tab[dc->ndev->id], bw);
542 #endif
543 }
544
545 static int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a,
546         struct tegra_dc_win *b)
547 {
548         if (a == b)
549                 return 0;
550
551         if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b))
552                 return 0;
553
554         /* because memory access to load the fifo can overlap, only care
555          * if windows overlap vertically */
556         return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) ||
557                 ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
558 }
559
560 /* check overlapping window combinations to find the max bandwidth. */
561 static unsigned long tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
562                                                  unsigned n)
563 {
564         unsigned i;
565         unsigned j;
566         unsigned long bw;
567         unsigned long max = 0;
568
569         for (i = 0; i < n; i++) {
570                 bw = wins[i]->new_bandwidth;
571                 for (j = 0; j < n; j++)
572                         if (tegra_dc_windows_is_overlapped(wins[i], wins[j]))
573                                 bw += wins[j]->new_bandwidth;
574                 if (max < bw)
575                         max = bw;
576         }
577         return max;
578 }
579
580 /*
581  * Calculate peak EMC bandwidth for each enabled window =
582  * pixel_clock * win_bpp * (use_v_filter ? 2 : 1)) * H_scale_factor *
583  * (windows_tiling ? 2 : 1)
584  *
585  * note:
586  * (*) We use 2 tap V filter on T2x/T3x, so need double BW if use V filter
587  * (*) Tiling mode on T30 and DDR3 requires double BW
588  *
589  * return:
590  * bandwidth in kBps
591  */
592 static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc,
593         struct tegra_dc_win *w)
594 {
595         unsigned long ret;
596         int tiled_windows_bw_multiplier;
597         unsigned long bpp;
598         unsigned in_w;
599
600         if (!WIN_IS_ENABLED(w))
601                 return 0;
602
603         if (dfixed_trunc(w->w) == 0 || dfixed_trunc(w->h) == 0 ||
604             w->out_w == 0 || w->out_h == 0)
605                 return 0;
606         if (w->flags & TEGRA_WIN_FLAG_SCAN_COLUMN)
607                 /* rotated: PRESCALE_SIZE swapped, but WIN_SIZE is unchanged */
608                 in_w = dfixed_trunc(w->h);
609         else
610                 in_w = dfixed_trunc(w->w); /* normal output, not rotated */
611
612         tiled_windows_bw_multiplier =
613                 tegra_mc_get_tiled_memory_bandwidth_multiplier();
614
615         /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel,
616          * but the size reported by tegra_dc_fmt_bpp for the planar version
617          * is of the luma plane's size only. */
618         bpp = tegra_dc_is_yuv_planar(w->fmt) ?
619                 2 * tegra_dc_fmt_bpp(w->fmt) : tegra_dc_fmt_bpp(w->fmt);
620         ret = dc->mode.pclk / 1000UL * bpp / 8 *
621 #if defined(CONFIG_ARCH_TEGRA_2x_SOC) || defined(CONFIG_ARCH_TEGRA_3x_SOC)
622                 (win_use_v_filter(dc, w) ? 2 : 1) *
623 #endif
624                 in_w / w->out_w * (WIN_IS_TILED(w) ?
625                 tiled_windows_bw_multiplier : 1);
626 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
627         /*
628          * Assuming 60% efficiency: i.e. if we calculate we need 70MBps, we
629          * will request 117MBps from EMC.
630          */
631         ret = ret + (17 * ret / 25);
632 #endif
633         return ret;
634 }
635
636 static unsigned long tegra_dc_get_bandwidth(
637         struct tegra_dc_win *windows[], int n)
638 {
639         int i;
640
641         BUG_ON(n > DC_N_WINDOWS);
642
643         /* emc rate and latency allowance both need to know per window
644          * bandwidths */
645         for (i = 0; i < n; i++) {
646                 struct tegra_dc_win *w = windows[i];
647
648                 if (w)
649                         w->new_bandwidth =
650                                 tegra_dc_calc_win_bandwidth(w->dc, w);
651         }
652
653         return tegra_dc_find_max_bandwidth(windows, n);
654 }
655
656 #ifdef CONFIG_TEGRA_ISOMGR
657 /* to save power, call when display memory clients would be idle */
658 void tegra_dc_clear_bandwidth(struct tegra_dc *dc)
659 {
660         int latency;
661
662         trace_clear_bandwidth(dc);
663         latency = tegra_isomgr_reserve(dc->isomgr_handle, 0, 1000);
664         if (latency) {
665                 dc->reserved_bw = 0;
666                 latency = tegra_isomgr_realize(dc->isomgr_handle);
667                 WARN_ONCE(!latency, "tegra_isomgr_realize failed\n");
668         } else {
669                 dev_dbg(&dc->ndev->dev, "Failed to clear bw.\n");
670                 tegra_dc_ext_process_bandwidth_renegotiate(
671                                 dc->ndev->id, NULL);
672         }
673         dc->bw_kbps = 0;
674 }
675 #else
676 /* to save power, call when display memory clients would be idle */
677 void tegra_dc_clear_bandwidth(struct tegra_dc *dc)
678 {
679         trace_clear_bandwidth(dc);
680         if (tegra_is_clk_enabled(dc->emc_clk))
681                 clk_disable_unprepare(dc->emc_clk);
682         dc->bw_kbps = 0;
683 }
684
685 /* bw in kByte/second. returns Hz for EMC frequency */
686 static inline unsigned long tegra_dc_kbps_to_emc(unsigned long bw)
687 {
688         unsigned long freq;
689
690         if (bw == ULONG_MAX)
691                 return ULONG_MAX;
692
693         freq = tegra_emc_bw_to_freq_req(bw);
694         if (freq >= (ULONG_MAX / 1000))
695                 return ULONG_MAX; /* freq too big - clamp at max */
696
697         if (WARN_ONCE((freq * 1000) < freq, "Bandwidth Overflow"))
698                 return ULONG_MAX; /* should never occur because of above. */
699         return freq * 1000;
700 }
701 #endif
702
703 /* use the larger of dc->bw_kbps or dc->new_bw_kbps, and copies
704  * dc->new_bw_kbps into dc->bw_kbps.
705  * calling this function both before and after a flip is sufficient to select
706  * the best possible frequency and latency allowance.
707  * set use_new to true to force dc->new_bw_kbps programming.
708  */
709 void tegra_dc_program_bandwidth(struct tegra_dc *dc, bool use_new)
710 {
711         unsigned i;
712
713         if (use_new || dc->bw_kbps != dc->new_bw_kbps) {
714                 long bw = max(dc->bw_kbps, dc->new_bw_kbps);
715
716 #ifdef CONFIG_TEGRA_ISOMGR
717                 int latency;
718
719                 /* reserve atleast the minimum bandwidth. */
720                 bw = max(dc->bw_kbps, tegra_dc_calc_min_bandwidth(dc));
721                 latency = tegra_isomgr_reserve(dc->isomgr_handle, bw, 1000);
722                 if (latency) {
723                         dc->reserved_bw = bw;
724                         latency = tegra_isomgr_realize(dc->isomgr_handle);
725                         WARN_ONCE(!latency, "tegra_isomgr_realize failed\n");
726                 } else {
727                         dev_dbg(&dc->ndev->dev, "Failed to reserve bw %ld.\n",
728                                                                         bw);
729                         tegra_dc_ext_process_bandwidth_renegotiate(
730                                 dc->ndev->id, NULL);
731                 }
732 #else /* EMC version */
733                 int emc_freq;
734
735                 /* going from 0 to non-zero */
736                 if (!dc->bw_kbps && dc->new_bw_kbps &&
737                         !tegra_is_clk_enabled(dc->emc_clk))
738                         clk_prepare_enable(dc->emc_clk);
739
740                 emc_freq = tegra_dc_kbps_to_emc(bw);
741                 clk_set_rate(dc->emc_clk, emc_freq);
742
743                 /* going from non-zero to 0 */
744                 if (dc->bw_kbps && !dc->new_bw_kbps &&
745                         tegra_is_clk_enabled(dc->emc_clk))
746                         clk_disable_unprepare(dc->emc_clk);
747 #endif
748                 dc->bw_kbps = dc->new_bw_kbps;
749         }
750
751         for_each_set_bit(i, &dc->valid_windows, DC_N_WINDOWS) {
752                 struct tegra_dc_win *w = &dc->windows[i];
753
754                 if ((use_new || w->bandwidth != w->new_bandwidth) &&
755                         w->new_bandwidth != 0)
756                         tegra_dc_set_latency_allowance(dc, w);
757                 trace_program_bandwidth(dc);
758                 w->bandwidth = w->new_bandwidth;
759         }
760 }
761
762 int tegra_dc_set_dynamic_emc(struct tegra_dc *dc)
763 {
764         unsigned long new_rate;
765         struct tegra_dc_win *windows[DC_N_WINDOWS];
766         unsigned i;
767         unsigned len;
768         unsigned win_status = 0;
769
770         if (!use_dynamic_emc)
771                 return 0;
772
773         for (i = 0, len = 0; i < DC_N_WINDOWS; i++) {
774                 struct tegra_dc_win *win = tegra_dc_get_window(dc, i);
775                 if (win) {
776                         windows[len++] = win;
777                         if (win->flags && TEGRA_WIN_FLAG_ENABLED)
778                                 win_status |= 1 << i;
779                 }
780         }
781 #ifdef CONFIG_TEGRA_ISOMGR
782         new_rate = tegra_dc_get_bandwidth(windows, len);
783 #else
784         if (tegra_dc_has_multiple_dc())
785                 new_rate = ULONG_MAX;
786         else
787                 new_rate = tegra_dc_get_bandwidth(windows, len);
788 #endif
789
790         dc->new_bw_kbps = new_rate;
791         trace_set_dynamic_emc(dc);
792
793         /* if low_v_win is set, we can lower vdd_core when
794                 that windows is the only one active */
795         if (dc->pdata->low_v_win != 0) {
796                 if (win_status == dc->pdata->low_v_win &&
797                         dc->win_status != dc->pdata->low_v_win) {
798                         tegra_dvfs_use_alt_freqs_on_clk(dc->clk, true);
799                         dc->win_status = dc->pdata->low_v_win;
800                 } else if (win_status != dc->pdata->low_v_win &&
801                         dc->win_status == dc->pdata->low_v_win) {
802                         tegra_dvfs_use_alt_freqs_on_clk(dc->clk, false);
803                         dc->win_status = win_status;
804                 }
805         }
806         return 0;
807 }
808
809 /* return the minimum bandwidth in kbps for display to function */
810 long tegra_dc_calc_min_bandwidth(struct tegra_dc *dc)
811 {
812         unsigned pclk = tegra_dc_get_out_max_pixclock(dc);
813
814         if (WARN_ONCE(!dc, "dc is NULL") ||
815                 WARN_ONCE(!dc->out, "dc->out is NULL!"))
816                 return 0;
817         if (!pclk) {
818                  if (dc->out->type == TEGRA_DC_OUT_HDMI) {
819 #if defined(CONFIG_ARCH_TEGRA_11x_SOC)
820                         pclk = KHZ2PICOS(300000); /* 300MHz max */
821 #else
822                         pclk = KHZ2PICOS(150000); /* 150MHz max */
823 #endif
824                 } else {
825                         pclk = KHZ2PICOS(dc->mode.pclk / 1000);
826                 }
827         }
828
829         return PICOS2KHZ(pclk) * 4; /* support a single 32bpp window */
830 }
831
832 #ifdef CONFIG_TEGRA_ISOMGR
833 int tegra_dc_bandwidth_negotiate_bw(struct tegra_dc *dc,
834                         struct tegra_dc_win *windows[], int n)
835 {
836         int latency;
837         u32 bw;
838
839         mutex_lock(&dc->lock);
840         /*
841          * isomgr will update available bandwidth through a callback.
842          * If available bandwidth is less than proposed bw fail the ioctl.
843          * If proposed bw is larger than reserved bw, make it in effect
844          * immediately. Otherwise, bandwidth will be adjusted in flips.
845          */
846         bw = tegra_dc_get_bandwidth(windows, n);
847         if (bw > dc->available_bw) {
848                 mutex_unlock(&dc->lock);
849                 return -1;
850         } else if (bw <= dc->reserved_bw) {
851                 mutex_unlock(&dc->lock);
852                 return 0;
853         }
854
855         latency = tegra_isomgr_reserve(dc->isomgr_handle, bw, 1000);
856         if (!latency) {
857                 dev_dbg(&dc->ndev->dev, "Failed to reserve proposed bw %d.\n",
858                                                                         bw);
859                 mutex_unlock(&dc->lock);
860                 return -1;
861         }
862
863         dc->reserved_bw = bw;
864         latency = tegra_isomgr_realize(dc->isomgr_handle);
865         if (!latency) {
866                 WARN_ONCE(!latency, "tegra_isomgr_realize failed\n");
867                 mutex_unlock(&dc->lock);
868                 return -1;
869         }
870
871         mutex_unlock(&dc->lock);
872
873         return 0;
874 }
875
876 void tegra_dc_bandwidth_renegotiate(void *p, u32 avail_bw)
877 {
878         struct tegra_dc_bw_data data;
879         struct tegra_dc *dc = p;
880
881         if (dc->available_bw == avail_bw)
882                 return;
883
884         if (WARN_ONCE(!dc, "dc is NULL!"))
885                 return;
886
887         data.total_bw = tegra_isomgr_get_total_iso_bw();
888         data.avail_bw = avail_bw;
889         data.resvd_bw = dc->reserved_bw;
890
891         tegra_dc_ext_process_bandwidth_renegotiate(dc->ndev->id, &data);
892
893         mutex_lock(&dc->lock);
894         dc->available_bw = avail_bw;
895         mutex_unlock(&dc->lock);
896 }
897 #endif