WAR: gr3d: limit 3d clock when camera is on
[linux-2.6.git] / drivers / video / tegra / host / gr3d / scale3d.c
1 /*
2  * drivers/video/tegra/host/t20/scale3d.c
3  *
4  * Tegra Graphics Host 3D clock scaling
5  *
6  * Copyright (c) 2010-2012, NVIDIA Corporation.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 /*
22  * 3d clock scaling
23  *
24  * module3d_notify_busy() is called upon submit, module3d_notify_idle() is
25  * called when all outstanding submits are completed. Idle times are measured
26  * over a fixed time period (scale3d.p_period). If the 3d module idle time
27  * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
28  * scaled down. If the percentage goes under the minimum limit (set in
29  * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
30  * over the time frame given in scale3d.p_fast_response for clocking up
31  * quickly in response to load peaks.
32  *
33  * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
34  * bezier-like factor added to pull 3d.emc rate a bit lower.
35  */
36
37 #include <linux/debugfs.h>
38 #include <linux/types.h>
39 #include <linux/clk.h>
40 #include <mach/clk.h>
41 #include <mach/hardware.h>
42 #include "scale3d.h"
43 #include "dev.h"
44 #include <media/tegra_camera.h>
45
46 static int scale3d_is_enabled(void);
47 static void scale3d_enable(int enable);
48
49 #define POW2(x) ((x) * (x))
50
51 /*
52  * 3D clock scaling should be treated differently when camera is on in AP37.
53  * 3D in AP37 requires 1.3V and combining it with MPE reaches to EDP limit.
54  * 3D clock really needs to be set to lower frequency which requires 1.0V.
55  * The same thing applies to 3D EMC clock.
56  */
57 #define CAMERA_3D_CLK 300000000
58 #define CAMERA_3D_EMC_CLK 437000000
59
60 /*
61  * debugfs parameters to control 3d clock scaling test
62  *
63  * period        - time period for clock rate evaluation
64  * fast_response - time period for evaluation of 'busy' spikes
65  * idle_min      - if less than [idle_min] percent idle over [fast_response]
66  *                 microseconds, clock up.
67  * idle_max      - if over [idle_max] percent idle over [period] microseconds,
68  *                 clock down.
69  * max_scale     - limits rate changes to no less than (100 - max_scale)% or
70  *                 (100 + 2 * max_scale)% of current clock rate
71  * verbosity     - set above 5 for debug printouts
72  */
73
74 struct scale3d_info_rec {
75         struct mutex lock; /* lock for timestamps etc */
76         int enable;
77         int init;
78         ktime_t idle_frame;
79         ktime_t fast_frame;
80         ktime_t last_idle;
81         ktime_t last_short_term_idle;
82         int is_idle;
83         ktime_t last_tweak;
84         ktime_t last_down;
85         int fast_up_count;
86         int slow_down_count;
87         int is_scaled;
88         int fast_responses;
89         unsigned long idle_total;
90         unsigned long idle_short_term_total;
91         unsigned long max_rate_3d;
92         long emc_slope;
93         long emc_offset;
94         long emc_dip_slope;
95         long emc_dip_offset;
96         long emc_xmid;
97         unsigned long min_rate_3d;
98         ktime_t last_throughput_hint;
99         struct work_struct work;
100         struct delayed_work idle_timer;
101         unsigned int scale;
102         unsigned int p_use_throughput_hint;
103         unsigned int p_throughput_lo_limit;
104         unsigned int p_throughput_hi_limit;
105         unsigned int p_scale_step;
106         unsigned int p_period;
107         unsigned int period;
108         unsigned int p_idle_min;
109         unsigned int idle_min;
110         unsigned int p_idle_max;
111         unsigned int idle_max;
112         unsigned int p_fast_response;
113         unsigned int fast_response;
114         unsigned int p_adjust;
115         unsigned int p_scale_emc;
116         unsigned int p_emc_dip;
117         unsigned int p_verbosity;
118         struct clk *clk_3d;
119         struct clk *clk_3d2;
120         struct clk *clk_3d_emc;
121 };
122
123 static struct scale3d_info_rec scale3d;
124
125 static void scale3d_clocks(unsigned long percent)
126 {
127         unsigned long hz, curr;
128
129         if (!tegra_is_clk_enabled(scale3d.clk_3d))
130                 return;
131
132         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
133                 if (!tegra_is_clk_enabled(scale3d.clk_3d2))
134                         return;
135
136         curr = clk_get_rate(scale3d.clk_3d);
137         hz = percent * (curr / 100);
138
139         if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
140                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
141                         clk_set_rate(scale3d.clk_3d2, 0);
142                 clk_set_rate(scale3d.clk_3d, hz);
143
144                 if (scale3d.p_scale_emc) {
145                         long after = (long) clk_get_rate(scale3d.clk_3d);
146                         hz = after * scale3d.emc_slope + scale3d.emc_offset;
147                         if (scale3d.p_emc_dip)
148                                 hz -=
149                                         (scale3d.emc_dip_slope *
150                                         POW2(after / 1000 - scale3d.emc_xmid) +
151                                         scale3d.emc_dip_offset);
152                         clk_set_rate(scale3d.clk_3d_emc, hz);
153                 }
154         }
155 }
156
157 static void scale3d_clocks_handler(struct work_struct *work)
158 {
159         unsigned int scale;
160
161         mutex_lock(&scale3d.lock);
162         scale = scale3d.scale;
163         mutex_unlock(&scale3d.lock);
164
165         if (scale != 0)
166                 scale3d_clocks(scale);
167 }
168
169 void nvhost_scale3d_suspend(struct nvhost_device *dev)
170 {
171         if (!scale3d.enable)
172                 return;
173
174         cancel_work_sync(&scale3d.work);
175         cancel_delayed_work(&scale3d.idle_timer);
176 }
177
178 /* set 3d clocks to max */
179 static void reset_3d_clocks(void)
180 {
181         if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) {
182                 if (is_tegra_camera_on())
183                         clk_set_rate(scale3d.clk_3d, CAMERA_3D_CLK);
184                 else
185                         clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
186                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
187                         if (is_tegra_camera_on())
188                                 clk_set_rate(scale3d.clk_3d2, CAMERA_3D_CLK);
189                         else
190                                 clk_set_rate(scale3d.clk_3d2,
191                                                         scale3d.max_rate_3d);
192                 }
193                 if (scale3d.p_scale_emc) {
194                         if (is_tegra_camera_on())
195                                 clk_set_rate(scale3d.clk_3d_emc,
196                                         CAMERA_3D_EMC_CLK);
197                         else
198                                 clk_set_rate(scale3d.clk_3d_emc,
199                                         clk_round_rate(scale3d.clk_3d_emc,
200                                                                 UINT_MAX));
201                 }
202         }
203 }
204
205 static int scale3d_is_enabled(void)
206 {
207         int enable;
208
209         if (!scale3d.enable)
210                 return 0;
211
212         mutex_lock(&scale3d.lock);
213         enable = scale3d.enable;
214         mutex_unlock(&scale3d.lock);
215
216         return enable;
217 }
218
219 static void scale3d_enable(int enable)
220 {
221         int disable = 0;
222
223         mutex_lock(&scale3d.lock);
224
225         if (enable) {
226                 if (scale3d.max_rate_3d != scale3d.min_rate_3d)
227                         scale3d.enable = 1;
228         } else {
229                 scale3d.enable = 0;
230                 disable = 1;
231         }
232
233         mutex_unlock(&scale3d.lock);
234
235         if (disable)
236                 reset_3d_clocks();
237 }
238
239 static void reset_scaling_counters(ktime_t time)
240 {
241         scale3d.idle_total = 0;
242         scale3d.idle_short_term_total = 0;
243         scale3d.last_idle = time;
244         scale3d.last_short_term_idle = time;
245         scale3d.idle_frame = time;
246 }
247
248 /* scaling_adjust - use scale up / scale down hint counts to adjust scaling
249  * parameters.
250  *
251  * hint_ratio is 100 x the ratio of scale up to scale down hints. Three cases
252  * are distinguished:
253  *
254  * hint_ratio < HINT_RATIO_MIN - set parameters to maximize scaling effect
255  * hint_ratio > HINT_RATIO_MAX - set parameters to minimize scaling effect
256  * hint_ratio between limits - scale parameters linearly
257  *
258  * the parameters adjusted are
259  *
260  * * fast_response time
261  * * period - time for scaling down estimate
262  * * idle_min percentage
263  * * idle_max percentage
264  */
265 #define SCALING_ADJUST_PERIOD 1000000
266 #define HINT_RATIO_MAX 400
267 #define HINT_RATIO_MIN 100
268 #define HINT_RATIO_MID ((HINT_RATIO_MAX + HINT_RATIO_MIN) / 2)
269 #define HINT_RATIO_DIFF (HINT_RATIO_MAX - HINT_RATIO_MIN)
270
271 static void scaling_adjust(ktime_t time)
272 {
273         long hint_ratio;
274         long fast_response_adjustment;
275         long period_adjustment;
276         int idle_min_adjustment;
277         int idle_max_adjustment;
278         unsigned long dt;
279
280         dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
281         if (dt < SCALING_ADJUST_PERIOD)
282                 return;
283
284         hint_ratio = (100 * (scale3d.fast_up_count + 1)) /
285                                  (scale3d.slow_down_count + 1);
286
287         if (hint_ratio > HINT_RATIO_MAX) {
288                 fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
289                 period_adjustment = scale3d.p_period / 2;
290                 idle_min_adjustment = scale3d.p_idle_min;
291                 idle_max_adjustment = scale3d.p_idle_max;
292         } else if (hint_ratio < HINT_RATIO_MIN) {
293                 fast_response_adjustment = scale3d.p_fast_response / 2;
294                 period_adjustment = -((int) scale3d.p_period) / 4;
295                 idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
296                 idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
297         } else {
298                 int diff;
299                 int factor;
300
301                 diff = HINT_RATIO_MID - hint_ratio;
302                 if (diff < 0)
303                         factor = -diff * 2;
304                 else {
305                         factor = -diff;
306                         diff *= 2;
307                 }
308
309                 fast_response_adjustment = diff *
310                         (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
311                 period_adjustment =
312                         diff * (scale3d.p_period / HINT_RATIO_DIFF);
313                 idle_min_adjustment =
314                         (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
315                 idle_max_adjustment =
316                         (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
317         }
318
319         scale3d.fast_response =
320                 scale3d.p_fast_response + fast_response_adjustment;
321         scale3d.period = scale3d.p_period + period_adjustment;
322                 scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
323         scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
324
325         if (scale3d.p_verbosity >= 10)
326                 pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
327                         scale3d.fast_up_count, scale3d.slow_down_count,
328                         scale3d.fast_responses, scale3d.fast_response,
329                         scale3d.period, scale3d.idle_min, scale3d.idle_max);
330
331         scale3d.fast_up_count = 0;
332         scale3d.slow_down_count = 0;
333         scale3d.fast_responses = 0;
334         scale3d.last_down = time;
335         scale3d.last_tweak = time;
336 }
337
338 #undef SCALING_ADJUST_PERIOD
339 #undef HINT_RATIO_MAX
340 #undef HINT_RATIO_MIN
341 #undef HINT_RATIO_MID
342 #undef HINT_RATIO_DIFF
343
344 static void scaling_state_check(ktime_t time)
345 {
346         unsigned long dt;
347
348         /* adjustment: set scale parameters (fast_response, period) +/- 25%
349          * based on ratio of scale up to scale down hints
350          */
351         if (scale3d.p_adjust)
352                 scaling_adjust(time);
353         else {
354                 scale3d.fast_response = scale3d.p_fast_response;
355                 scale3d.period = scale3d.p_period;
356                 scale3d.idle_min = scale3d.p_idle_min;
357                 scale3d.idle_max = scale3d.p_idle_max;
358         }
359
360         /* check for load peaks */
361         dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
362         if (dt > scale3d.fast_response) {
363                 unsigned long idleness =
364                         (scale3d.idle_short_term_total * 100) / dt;
365                 scale3d.fast_responses++;
366                 scale3d.fast_frame = time;
367                 /* if too busy, scale up */
368                 if (idleness < scale3d.idle_min) {
369                         scale3d.is_scaled = 0;
370                         scale3d.fast_up_count++;
371                         if (scale3d.p_verbosity >= 5)
372                                 pr_info("scale3d: %ld%% busy\n",
373                                         100 - idleness);
374
375                         reset_3d_clocks();
376                         reset_scaling_counters(time);
377                         return;
378                 }
379                 scale3d.idle_short_term_total = 0;
380                 scale3d.last_short_term_idle = time;
381         }
382
383         dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
384         if (dt > scale3d.period) {
385                 unsigned long idleness = (scale3d.idle_total * 100) / dt;
386
387                 if (scale3d.p_verbosity >= 5)
388                         pr_info("scale3d: idle %lu, ~%lu%%\n",
389                                 scale3d.idle_total, idleness);
390
391                 if (idleness > scale3d.idle_max) {
392                         if (!scale3d.is_scaled) {
393                                 scale3d.is_scaled = 1;
394                                 scale3d.last_down = time;
395                         }
396                         scale3d.slow_down_count++;
397                         /* if idle time is high, clock down */
398                         scale3d.scale = 100 - (idleness - scale3d.idle_min);
399                         schedule_work(&scale3d.work);
400                 }
401
402                 reset_scaling_counters(time);
403         }
404 }
405
406 void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
407 {
408         ktime_t t;
409         unsigned long dt;
410
411         if (!scale3d.enable)
412                 return;
413
414         /* if throughput hint enabled, and last hint is recent enough, return */
415         if (scale3d.p_use_throughput_hint) {
416                 t = ktime_get();
417                 if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
418                         return;
419         }
420
421         mutex_lock(&scale3d.lock);
422
423         t = ktime_get();
424
425         if (scale3d.is_idle) {
426                 dt = ktime_us_delta(t, scale3d.last_idle);
427                 scale3d.idle_total += dt;
428                 dt = ktime_us_delta(t, scale3d.last_short_term_idle);
429                 scale3d.idle_short_term_total += dt;
430         } else
431                 scale3d.is_idle = 1;
432
433         scale3d.last_idle = t;
434         scale3d.last_short_term_idle = t;
435
436         scaling_state_check(scale3d.last_idle);
437
438         /* delay idle_max % of 2 * fast_response time (given in microseconds) */
439         schedule_delayed_work(&scale3d.idle_timer,
440                 msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
441                         / 50000));
442
443         mutex_unlock(&scale3d.lock);
444 }
445
446 void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
447 {
448         unsigned long idle;
449         unsigned long short_term_idle;
450         ktime_t t;
451
452         if (!scale3d.enable)
453                 return;
454
455         /* if throughput hint enabled, and last hint is recent enough, return */
456         if (scale3d.p_use_throughput_hint) {
457                 t = ktime_get();
458                 if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
459                         return;
460         }
461
462         mutex_lock(&scale3d.lock);
463
464         cancel_delayed_work(&scale3d.idle_timer);
465
466         t = ktime_get();
467
468         if (scale3d.is_idle) {
469                 idle = (unsigned long)
470                         ktime_us_delta(t, scale3d.last_idle);
471                 scale3d.idle_total += idle;
472                 short_term_idle =
473                         ktime_us_delta(t, scale3d.last_short_term_idle);
474                 scale3d.idle_short_term_total += short_term_idle;
475                 scale3d.is_idle = 0;
476         }
477
478         scaling_state_check(t);
479
480         mutex_unlock(&scale3d.lock);
481 }
482
483 static void do_scale(int diff)
484 {
485         unsigned long hz, curr;
486
487         if (!tegra_is_clk_enabled(scale3d.clk_3d))
488                 return;
489
490         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
491                 if (!tegra_is_clk_enabled(scale3d.clk_3d2))
492                         return;
493
494         curr = clk_get_rate(scale3d.clk_3d);
495         hz = curr + diff;
496
497         if (hz < scale3d.min_rate_3d)
498                 hz = scale3d.min_rate_3d;
499
500         if (hz > scale3d.max_rate_3d)
501                 hz = scale3d.max_rate_3d;
502
503         if (hz == curr) return;
504
505         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
506                 clk_set_rate(scale3d.clk_3d2, 0);
507         clk_set_rate(scale3d.clk_3d, hz);
508
509         if (scale3d.p_scale_emc) {
510                 long after = (long) clk_get_rate(scale3d.clk_3d);
511                 hz = after * scale3d.emc_slope + scale3d.emc_offset;
512                 if (scale3d.p_emc_dip)
513                         hz -=
514                                 (scale3d.emc_dip_slope *
515                                 POW2(after / 1000 - scale3d.emc_xmid) +
516                                 scale3d.emc_dip_offset);
517                 clk_set_rate(scale3d.clk_3d_emc, hz);
518         }
519 }
520
521 #define scale_up() do_scale(scale3d.p_scale_step)
522 #define scale_down() do_scale(-scale3d.p_scale_step)
523
524 void nvhost_scale3d_set_throughput_hint(int hint)
525 {
526         if (!scale3d.enable)
527                 return;
528
529         if (!scale3d.p_use_throughput_hint)
530                 return;
531
532         scale3d.last_throughput_hint = ktime_get();
533
534         if (scale3d.p_use_throughput_hint) {
535                 if (hint >= scale3d.p_throughput_hi_limit)
536                         scale_down();
537                 else if (hint <= scale3d.p_throughput_lo_limit)
538                         scale_up();
539         }
540 }
541 EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint);
542
543 static void scale3d_idle_handler(struct work_struct *work)
544 {
545         int notify_idle = 0;
546
547         if (!scale3d.enable)
548                 return;
549
550         mutex_lock(&scale3d.lock);
551
552         if (scale3d.is_idle && tegra_is_clk_enabled(scale3d.clk_3d)) {
553                 unsigned long curr = clk_get_rate(scale3d.clk_3d);
554                 if (curr > scale3d.min_rate_3d)
555                         notify_idle = 1;
556         }
557
558         mutex_unlock(&scale3d.lock);
559
560         if (notify_idle)
561                 nvhost_scale3d_notify_idle(NULL);
562 }
563
564 void nvhost_scale3d_reset()
565 {
566         ktime_t t;
567
568         if (!scale3d.enable)
569                 return;
570
571         t = ktime_get();
572         mutex_lock(&scale3d.lock);
573         reset_scaling_counters(t);
574         mutex_unlock(&scale3d.lock);
575 }
576
577 /*
578  * debugfs parameters to control 3d clock scaling
579  */
580
581 void nvhost_scale3d_debug_init(struct dentry *de)
582 {
583         struct dentry *d, *f;
584
585         d = debugfs_create_dir("scaling", de);
586         if (!d) {
587                 pr_err("scale3d: can\'t create debugfs directory\n");
588                 return;
589         }
590
591 #define CREATE_SCALE3D_FILE(fname) \
592         do {\
593                 f = debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, d,\
594                         &scale3d.p_##fname);\
595                 if (NULL == f) {\
596                         pr_err("scale3d: can\'t create file " #fname "\n");\
597                         return;\
598                 } \
599         } while (0)
600
601         CREATE_SCALE3D_FILE(fast_response);
602         CREATE_SCALE3D_FILE(idle_min);
603         CREATE_SCALE3D_FILE(idle_max);
604         CREATE_SCALE3D_FILE(period);
605         CREATE_SCALE3D_FILE(adjust);
606         CREATE_SCALE3D_FILE(scale_emc);
607         CREATE_SCALE3D_FILE(emc_dip);
608         CREATE_SCALE3D_FILE(use_throughput_hint);
609         CREATE_SCALE3D_FILE(throughput_hi_limit);
610         CREATE_SCALE3D_FILE(throughput_lo_limit);
611         CREATE_SCALE3D_FILE(scale_step);
612         CREATE_SCALE3D_FILE(verbosity);
613 #undef CREATE_SCALE3D_FILE
614 }
615
616 static ssize_t enable_3d_scaling_show(struct device *device,
617         struct device_attribute *attr, char *buf)
618 {
619         ssize_t res;
620
621         res = snprintf(buf, PAGE_SIZE, "%d\n", scale3d_is_enabled());
622
623         return res;
624 }
625
626 static ssize_t enable_3d_scaling_store(struct device *dev,
627         struct device_attribute *attr, const char *buf, size_t count)
628 {
629         unsigned long val = 0;
630
631         if (strict_strtoul(buf, 10, &val) < 0)
632                 return -EINVAL;
633
634         scale3d_enable(val);
635
636         return count;
637 }
638
639 static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
640         enable_3d_scaling_show, enable_3d_scaling_store);
641
642 void nvhost_scale3d_init(struct nvhost_device *d)
643 {
644         if (!scale3d.init) {
645                 int error;
646                 unsigned long max_emc, min_emc;
647                 long correction;
648                 mutex_init(&scale3d.lock);
649
650                 INIT_WORK(&scale3d.work, scale3d_clocks_handler);
651                 INIT_DELAYED_WORK(&scale3d.idle_timer, scale3d_idle_handler);
652
653                 scale3d.clk_3d = d->clk[0];
654                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
655                         scale3d.clk_3d2 = d->clk[1];
656                         scale3d.clk_3d_emc = d->clk[2];
657                 } else
658                         scale3d.clk_3d_emc = d->clk[1];
659
660                 scale3d.max_rate_3d = clk_round_rate(scale3d.clk_3d, UINT_MAX);
661                 scale3d.min_rate_3d = clk_round_rate(scale3d.clk_3d, 0);
662
663                 if (scale3d.max_rate_3d == scale3d.min_rate_3d) {
664                         pr_warn("scale3d: 3d max rate = min rate (%lu), "
665                                 "disabling\n", scale3d.max_rate_3d);
666                         scale3d.enable = 0;
667                         return;
668                 }
669
670                 /* emc scaling:
671                  *
672                  * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
673                  *
674                  * Remc - 3d.emc rate
675                  * R3d  - 3d.cbus rate
676                  * Rm   - 3d.cbus 'middle' rate = (max + min)/2
677                  * S    - emc_slope
678                  * O    - emc_offset
679                  * Sd   - emc_dip_slope
680                  * Od   - emc_dip_offset
681                  *
682                  * this superposes a quadratic dip centered around the middle 3d
683                  * frequency over a linear correlation of 3d.emc to 3d clock
684                  * rates.
685                  *
686                  * S, O are chosen so that the maximum 3d rate produces the
687                  * maximum 3d.emc rate exactly, and the minimum 3d rate produces
688                  * at least the minimum 3d.emc rate.
689                  *
690                  * Sd and Od are chosen to produce the largest dip that will
691                  * keep 3d.emc frequencies monotonously decreasing with 3d
692                  * frequencies. To achieve this, the first derivative of Remc
693                  * with respect to R3d should be zero for the minimal 3d rate:
694                  *
695                  *   R'emc = S - 2 * Sd * (R3d - Rm)
696                  *   R'emc(R3d-min) = 0
697                  *   S = 2 * Sd * (R3d-min - Rm)
698                  *     = 2 * Sd * (R3d-min - R3d-max) / 2
699                  *   Sd = S / (R3d-min - R3d-max)
700                  *
701                  *   +---------------------------------------------------+
702                  *   | Sd = -(emc-max - emc-min) / (R3d-min - R3d-max)^2 |
703                  *   +---------------------------------------------------+
704                  *
705                  *   dip = Sd * (R3d - Rm)^2 + Od
706                  *
707                  * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
708                  *
709                  *   Sd * (R3d-min - Rm)^2 + Od = 0
710                  *   Od = -Sd * ((R3d-min - R3d-max) / 2)^2
711                  *      = -Sd * ((R3d-min - R3d-max)^2) / 4
712                  *
713                  *   +------------------------------+
714                  *   | Od = (emc-max - emc-min) / 4 |
715                  *   +------------------------------+
716                  */
717
718                 max_emc = clk_round_rate(scale3d.clk_3d_emc, UINT_MAX);
719                 min_emc = clk_round_rate(scale3d.clk_3d_emc, 0);
720
721                 scale3d.emc_slope = (max_emc - min_emc) /
722                          (scale3d.max_rate_3d - scale3d.min_rate_3d);
723                 scale3d.emc_offset = max_emc -
724                         scale3d.emc_slope * scale3d.max_rate_3d;
725                 /* guarantee max 3d rate maps to max emc rate */
726                 scale3d.emc_offset += max_emc -
727                         (scale3d.emc_slope * scale3d.max_rate_3d +
728                         scale3d.emc_offset);
729
730                 scale3d.emc_dip_offset = (max_emc - min_emc) / 4;
731                 scale3d.emc_dip_slope =
732                         -4 * (scale3d.emc_dip_offset /
733                         (POW2(scale3d.max_rate_3d - scale3d.min_rate_3d)));
734                 scale3d.emc_xmid =
735                         (scale3d.max_rate_3d + scale3d.min_rate_3d) / 2;
736                 correction =
737                         scale3d.emc_dip_offset +
738                                 scale3d.emc_dip_slope *
739                                 POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
740                 scale3d.emc_dip_offset -= correction;
741
742                 /* set scaling parameter defaults */
743                 scale3d.enable = 1;
744                 scale3d.period = scale3d.p_period = 100000;
745                 scale3d.idle_min = scale3d.p_idle_min = 10;
746                 scale3d.idle_max = scale3d.p_idle_max = 15;
747                 scale3d.fast_response = scale3d.p_fast_response = 7000;
748                 scale3d.p_scale_emc = 1;
749                 scale3d.p_emc_dip = 1;
750                 scale3d.p_verbosity = 0;
751                 scale3d.p_adjust = 1;
752                 scale3d.p_use_throughput_hint = 1;
753                 scale3d.p_throughput_lo_limit = 95;
754                 scale3d.p_throughput_hi_limit = 100;
755                 scale3d.p_scale_step = 60000000;
756
757                 error = device_create_file(&d->dev,
758                                 &dev_attr_enable_3d_scaling);
759                 if (error)
760                         dev_err(&d->dev, "failed to create sysfs attributes");
761
762                 scale3d.init = 1;
763         }
764
765         nvhost_scale3d_reset();
766 }
767
768 void nvhost_scale3d_deinit(struct nvhost_device *dev)
769 {
770         device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
771         scale3d.init = 0;
772 }