video: tegra: host: adding throughput hint in 3dfs
[linux-2.6.git] / drivers / video / tegra / host / gr3d / scale3d.c
1 /*
2  * drivers/video/tegra/host/t20/scale3d.c
3  *
4  * Tegra Graphics Host 3D clock scaling
5  *
6  * Copyright (c) 2010-2012, NVIDIA Corporation.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 /*
22  * 3d clock scaling
23  *
24  * module3d_notify_busy() is called upon submit, module3d_notify_idle() is
25  * called when all outstanding submits are completed. Idle times are measured
26  * over a fixed time period (scale3d.p_period). If the 3d module idle time
27  * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
28  * scaled down. If the percentage goes under the minimum limit (set in
29  * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
30  * over the time frame given in scale3d.p_fast_response for clocking up
31  * quickly in response to load peaks.
32  *
33  * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
34  * bezier-like factor added to pull 3d.emc rate a bit lower.
35  */
36
37 #include <linux/debugfs.h>
38 #include <linux/types.h>
39 #include <linux/clk.h>
40 #include <linux/export.h>
41 #include <mach/clk.h>
42 #include <mach/hardware.h>
43 #include "scale3d.h"
44 #include "dev.h"
45
46 static int scale3d_is_enabled(void);
47 static void scale3d_enable(int enable);
48
49 #define POW2(x) ((x) * (x))
50
51 /*
52  * debugfs parameters to control 3d clock scaling test
53  *
54  * period        - time period for clock rate evaluation
55  * fast_response - time period for evaluation of 'busy' spikes
56  * idle_min      - if less than [idle_min] percent idle over [fast_response]
57  *                 microseconds, clock up.
58  * idle_max      - if over [idle_max] percent idle over [period] microseconds,
59  *                 clock down.
60  * max_scale     - limits rate changes to no less than (100 - max_scale)% or
61  *                 (100 + 2 * max_scale)% of current clock rate
62  * verbosity     - set above 5 for debug printouts
63  */
64
65 struct scale3d_info_rec {
66         struct mutex lock; /* lock for timestamps etc */
67         int enable;
68         int init;
69         ktime_t idle_frame;
70         ktime_t fast_frame;
71         ktime_t last_idle;
72         ktime_t last_short_term_idle;
73         int is_idle;
74         ktime_t last_tweak;
75         ktime_t last_down;
76         int fast_up_count;
77         int slow_down_count;
78         int is_scaled;
79         int fast_responses;
80         unsigned long idle_total;
81         unsigned long idle_short_term_total;
82         unsigned long max_rate_3d;
83         long emc_slope;
84         long emc_offset;
85         long emc_dip_slope;
86         long emc_dip_offset;
87         long emc_xmid;
88         unsigned long min_rate_3d;
89         ktime_t last_throughput_hint;
90         struct work_struct work;
91         struct delayed_work idle_timer;
92         unsigned int scale;
93         unsigned int p_use_throughput_hint;
94         unsigned int p_throughput_lo_limit;
95         unsigned int p_throughput_hi_limit;
96         unsigned int p_scale_step;
97         unsigned int p_period;
98         unsigned int period;
99         unsigned int p_idle_min;
100         unsigned int idle_min;
101         unsigned int p_idle_max;
102         unsigned int idle_max;
103         unsigned int p_fast_response;
104         unsigned int fast_response;
105         unsigned int p_adjust;
106         unsigned int p_scale_emc;
107         unsigned int p_emc_dip;
108         unsigned int p_verbosity;
109         struct clk *clk_3d;
110         struct clk *clk_3d2;
111         struct clk *clk_3d_emc;
112 };
113
114 static struct scale3d_info_rec scale3d;
115
116 static void scale3d_clocks(unsigned long percent)
117 {
118         unsigned long hz, curr;
119
120         if (!tegra_is_clk_enabled(scale3d.clk_3d))
121                 return;
122
123         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
124                 if (!tegra_is_clk_enabled(scale3d.clk_3d2))
125                         return;
126
127         curr = clk_get_rate(scale3d.clk_3d);
128         hz = percent * (curr / 100);
129
130         if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
131                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
132                         clk_set_rate(scale3d.clk_3d2, 0);
133                 clk_set_rate(scale3d.clk_3d, hz);
134
135                 if (scale3d.p_scale_emc) {
136                         long after = (long) clk_get_rate(scale3d.clk_3d);
137                         hz = after * scale3d.emc_slope + scale3d.emc_offset;
138                         if (scale3d.p_emc_dip)
139                                 hz -=
140                                         (scale3d.emc_dip_slope *
141                                         POW2(after / 1000 - scale3d.emc_xmid) +
142                                         scale3d.emc_dip_offset);
143                         clk_set_rate(scale3d.clk_3d_emc, hz);
144                 }
145         }
146 }
147
148 static void scale3d_clocks_handler(struct work_struct *work)
149 {
150         unsigned int scale;
151
152         mutex_lock(&scale3d.lock);
153         scale = scale3d.scale;
154         mutex_unlock(&scale3d.lock);
155
156         if (scale != 0)
157                 scale3d_clocks(scale);
158 }
159
160 void nvhost_scale3d_suspend(struct nvhost_device *dev)
161 {
162         if (!scale3d.enable)
163                 return;
164
165         cancel_work_sync(&scale3d.work);
166         cancel_delayed_work(&scale3d.idle_timer);
167 }
168
169 /* set 3d clocks to max */
170 static void reset_3d_clocks(void)
171 {
172         if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) {
173                 clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
174                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
175                         clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d);
176                 if (scale3d.p_scale_emc)
177                         clk_set_rate(scale3d.clk_3d_emc,
178                                 clk_round_rate(scale3d.clk_3d_emc, UINT_MAX));
179         }
180 }
181
182 static int scale3d_is_enabled(void)
183 {
184         int enable;
185
186         if (!scale3d.enable)
187                 return 0;
188
189         mutex_lock(&scale3d.lock);
190         enable = scale3d.enable;
191         mutex_unlock(&scale3d.lock);
192
193         return enable;
194 }
195
196 static void scale3d_enable(int enable)
197 {
198         int disable = 0;
199
200         mutex_lock(&scale3d.lock);
201
202         if (enable) {
203                 if (scale3d.max_rate_3d != scale3d.min_rate_3d)
204                         scale3d.enable = 1;
205         } else {
206                 scale3d.enable = 0;
207                 disable = 1;
208         }
209
210         mutex_unlock(&scale3d.lock);
211
212         if (disable)
213                 reset_3d_clocks();
214 }
215
216 static void reset_scaling_counters(ktime_t time)
217 {
218         scale3d.idle_total = 0;
219         scale3d.idle_short_term_total = 0;
220         scale3d.last_idle = time;
221         scale3d.last_short_term_idle = time;
222         scale3d.idle_frame = time;
223 }
224
225 /* scaling_adjust - use scale up / scale down hint counts to adjust scaling
226  * parameters.
227  *
228  * hint_ratio is 100 x the ratio of scale up to scale down hints. Three cases
229  * are distinguished:
230  *
231  * hint_ratio < HINT_RATIO_MIN - set parameters to maximize scaling effect
232  * hint_ratio > HINT_RATIO_MAX - set parameters to minimize scaling effect
233  * hint_ratio between limits - scale parameters linearly
234  *
235  * the parameters adjusted are
236  *
237  * * fast_response time
238  * * period - time for scaling down estimate
239  * * idle_min percentage
240  * * idle_max percentage
241  */
242 #define SCALING_ADJUST_PERIOD 1000000
243 #define HINT_RATIO_MAX 400
244 #define HINT_RATIO_MIN 100
245 #define HINT_RATIO_MID ((HINT_RATIO_MAX + HINT_RATIO_MIN) / 2)
246 #define HINT_RATIO_DIFF (HINT_RATIO_MAX - HINT_RATIO_MIN)
247
248 static void scaling_adjust(ktime_t time)
249 {
250         long hint_ratio;
251         long fast_response_adjustment;
252         long period_adjustment;
253         int idle_min_adjustment;
254         int idle_max_adjustment;
255         unsigned long dt;
256
257         dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
258         if (dt < SCALING_ADJUST_PERIOD)
259                 return;
260
261         hint_ratio = (100 * (scale3d.fast_up_count + 1)) /
262                                  (scale3d.slow_down_count + 1);
263
264         if (hint_ratio > HINT_RATIO_MAX) {
265                 fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
266                 period_adjustment = scale3d.p_period / 2;
267                 idle_min_adjustment = scale3d.p_idle_min;
268                 idle_max_adjustment = scale3d.p_idle_max;
269         } else if (hint_ratio < HINT_RATIO_MIN) {
270                 fast_response_adjustment = scale3d.p_fast_response / 2;
271                 period_adjustment = -((int) scale3d.p_period) / 4;
272                 idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
273                 idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
274         } else {
275                 int diff;
276                 int factor;
277
278                 diff = HINT_RATIO_MID - hint_ratio;
279                 if (diff < 0)
280                         factor = -diff * 2;
281                 else {
282                         factor = -diff;
283                         diff *= 2;
284                 }
285
286                 fast_response_adjustment = diff *
287                         (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
288                 period_adjustment =
289                         diff * (scale3d.p_period / HINT_RATIO_DIFF);
290                 idle_min_adjustment =
291                         (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
292                 idle_max_adjustment =
293                         (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
294         }
295
296         scale3d.fast_response =
297                 scale3d.p_fast_response + fast_response_adjustment;
298         scale3d.period = scale3d.p_period + period_adjustment;
299                 scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
300         scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
301
302         if (scale3d.p_verbosity >= 10)
303                 pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
304                         scale3d.fast_up_count, scale3d.slow_down_count,
305                         scale3d.fast_responses, scale3d.fast_response,
306                         scale3d.period, scale3d.idle_min, scale3d.idle_max);
307
308         scale3d.fast_up_count = 0;
309         scale3d.slow_down_count = 0;
310         scale3d.fast_responses = 0;
311         scale3d.last_down = time;
312         scale3d.last_tweak = time;
313 }
314
315 #undef SCALING_ADJUST_PERIOD
316 #undef HINT_RATIO_MAX
317 #undef HINT_RATIO_MIN
318 #undef HINT_RATIO_MID
319 #undef HINT_RATIO_DIFF
320
321 static void scaling_state_check(ktime_t time)
322 {
323         unsigned long dt;
324
325         /* adjustment: set scale parameters (fast_response, period) +/- 25%
326          * based on ratio of scale up to scale down hints
327          */
328         if (scale3d.p_adjust)
329                 scaling_adjust(time);
330         else {
331                 scale3d.fast_response = scale3d.p_fast_response;
332                 scale3d.period = scale3d.p_period;
333                 scale3d.idle_min = scale3d.p_idle_min;
334                 scale3d.idle_max = scale3d.p_idle_max;
335         }
336
337         /* check for load peaks */
338         dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
339         if (dt > scale3d.fast_response) {
340                 unsigned long idleness =
341                         (scale3d.idle_short_term_total * 100) / dt;
342                 scale3d.fast_responses++;
343                 scale3d.fast_frame = time;
344                 /* if too busy, scale up */
345                 if (idleness < scale3d.idle_min) {
346                         scale3d.is_scaled = 0;
347                         scale3d.fast_up_count++;
348                         if (scale3d.p_verbosity >= 5)
349                                 pr_info("scale3d: %ld%% busy\n",
350                                         100 - idleness);
351
352                         reset_3d_clocks();
353                         reset_scaling_counters(time);
354                         return;
355                 }
356                 scale3d.idle_short_term_total = 0;
357                 scale3d.last_short_term_idle = time;
358         }
359
360         dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
361         if (dt > scale3d.period) {
362                 unsigned long idleness = (scale3d.idle_total * 100) / dt;
363
364                 if (scale3d.p_verbosity >= 5)
365                         pr_info("scale3d: idle %lu, ~%lu%%\n",
366                                 scale3d.idle_total, idleness);
367
368                 if (idleness > scale3d.idle_max) {
369                         if (!scale3d.is_scaled) {
370                                 scale3d.is_scaled = 1;
371                                 scale3d.last_down = time;
372                         }
373                         scale3d.slow_down_count++;
374                         /* if idle time is high, clock down */
375                         scale3d.scale = 100 - (idleness - scale3d.idle_min);
376                         schedule_work(&scale3d.work);
377                 }
378
379                 reset_scaling_counters(time);
380         }
381 }
382
383 void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
384 {
385         ktime_t t;
386         unsigned long dt;
387
388         if (!scale3d.enable)
389                 return;
390
391         /* if throughput hint enabled, and last hint is recent enough, return */
392         if (scale3d.p_use_throughput_hint) {
393                 t = ktime_get();
394                 if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
395                         return;
396         }
397
398         mutex_lock(&scale3d.lock);
399
400         t = ktime_get();
401
402         if (scale3d.is_idle) {
403                 dt = ktime_us_delta(t, scale3d.last_idle);
404                 scale3d.idle_total += dt;
405                 dt = ktime_us_delta(t, scale3d.last_short_term_idle);
406                 scale3d.idle_short_term_total += dt;
407         } else
408                 scale3d.is_idle = 1;
409
410         scale3d.last_idle = t;
411         scale3d.last_short_term_idle = t;
412
413         scaling_state_check(scale3d.last_idle);
414
415         /* delay idle_max % of 2 * fast_response time (given in microseconds) */
416         schedule_delayed_work(&scale3d.idle_timer,
417                 msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
418                         / 50000));
419
420         mutex_unlock(&scale3d.lock);
421 }
422
423 void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
424 {
425         unsigned long idle;
426         unsigned long short_term_idle;
427         ktime_t t;
428
429         if (!scale3d.enable)
430                 return;
431
432         /* if throughput hint enabled, and last hint is recent enough, return */
433         if (scale3d.p_use_throughput_hint) {
434                 t = ktime_get();
435                 if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
436                         return;
437         }
438
439         mutex_lock(&scale3d.lock);
440
441         cancel_delayed_work(&scale3d.idle_timer);
442
443         t = ktime_get();
444
445         if (scale3d.is_idle) {
446                 idle = (unsigned long)
447                         ktime_us_delta(t, scale3d.last_idle);
448                 scale3d.idle_total += idle;
449                 short_term_idle =
450                         ktime_us_delta(t, scale3d.last_short_term_idle);
451                 scale3d.idle_short_term_total += short_term_idle;
452                 scale3d.is_idle = 0;
453         }
454
455         scaling_state_check(t);
456
457         mutex_unlock(&scale3d.lock);
458 }
459
460 static void do_scale(int diff)
461 {
462         unsigned long hz, curr;
463
464         if (!tegra_is_clk_enabled(scale3d.clk_3d))
465                 return;
466
467         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
468                 if (!tegra_is_clk_enabled(scale3d.clk_3d2))
469                         return;
470
471         curr = clk_get_rate(scale3d.clk_3d);
472         hz = curr + diff;
473
474         if (hz < scale3d.min_rate_3d)
475                 hz = scale3d.min_rate_3d;
476
477         if (hz > scale3d.max_rate_3d)
478                 hz = scale3d.max_rate_3d;
479
480         if (hz == curr) return;
481
482         if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
483                 clk_set_rate(scale3d.clk_3d2, 0);
484         clk_set_rate(scale3d.clk_3d, hz);
485
486         if (scale3d.p_scale_emc) {
487                 long after = (long) clk_get_rate(scale3d.clk_3d);
488                 hz = after * scale3d.emc_slope + scale3d.emc_offset;
489                 if (scale3d.p_emc_dip)
490                         hz -=
491                                 (scale3d.emc_dip_slope *
492                                 POW2(after / 1000 - scale3d.emc_xmid) +
493                                 scale3d.emc_dip_offset);
494                 clk_set_rate(scale3d.clk_3d_emc, hz);
495         }
496 }
497
498 #define scale_up() do_scale(scale3d.p_scale_step)
499 #define scale_down() do_scale(-scale3d.p_scale_step)
500
501 void nvhost_scale3d_set_throughput_hint(int hint)
502 {
503         if (!scale3d.enable)
504                 return;
505
506         if (!scale3d.p_use_throughput_hint)
507                 return;
508
509         scale3d.last_throughput_hint = ktime_get();
510
511         if (scale3d.p_use_throughput_hint) {
512                 if (hint >= scale3d.p_throughput_hi_limit)
513                         scale_down();
514                 else if (hint <= scale3d.p_throughput_lo_limit)
515                         scale_up();
516         }
517 }
518 EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint);
519
520 static void scale3d_idle_handler(struct work_struct *work)
521 {
522         int notify_idle = 0;
523
524         if (!scale3d.enable)
525                 return;
526
527         mutex_lock(&scale3d.lock);
528
529         if (scale3d.is_idle && tegra_is_clk_enabled(scale3d.clk_3d)) {
530                 unsigned long curr = clk_get_rate(scale3d.clk_3d);
531                 if (curr > scale3d.min_rate_3d)
532                         notify_idle = 1;
533         }
534
535         mutex_unlock(&scale3d.lock);
536
537         if (notify_idle)
538                 nvhost_scale3d_notify_idle(NULL);
539 }
540
541 void nvhost_scale3d_reset()
542 {
543         ktime_t t;
544
545         if (!scale3d.enable)
546                 return;
547
548         t = ktime_get();
549         mutex_lock(&scale3d.lock);
550         reset_scaling_counters(t);
551         mutex_unlock(&scale3d.lock);
552 }
553
554 /*
555  * debugfs parameters to control 3d clock scaling
556  */
557
558 void nvhost_scale3d_debug_init(struct dentry *de)
559 {
560         struct dentry *d, *f;
561
562         d = debugfs_create_dir("scaling", de);
563         if (!d) {
564                 pr_err("scale3d: can\'t create debugfs directory\n");
565                 return;
566         }
567
568 #define CREATE_SCALE3D_FILE(fname) \
569         do {\
570                 f = debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, d,\
571                         &scale3d.p_##fname);\
572                 if (NULL == f) {\
573                         pr_err("scale3d: can\'t create file " #fname "\n");\
574                         return;\
575                 } \
576         } while (0)
577
578         CREATE_SCALE3D_FILE(fast_response);
579         CREATE_SCALE3D_FILE(idle_min);
580         CREATE_SCALE3D_FILE(idle_max);
581         CREATE_SCALE3D_FILE(period);
582         CREATE_SCALE3D_FILE(adjust);
583         CREATE_SCALE3D_FILE(scale_emc);
584         CREATE_SCALE3D_FILE(emc_dip);
585         CREATE_SCALE3D_FILE(use_throughput_hint);
586         CREATE_SCALE3D_FILE(throughput_hi_limit);
587         CREATE_SCALE3D_FILE(throughput_lo_limit);
588         CREATE_SCALE3D_FILE(scale_step);
589         CREATE_SCALE3D_FILE(verbosity);
590 #undef CREATE_SCALE3D_FILE
591 }
592
593 static ssize_t enable_3d_scaling_show(struct device *device,
594         struct device_attribute *attr, char *buf)
595 {
596         ssize_t res;
597
598         res = snprintf(buf, PAGE_SIZE, "%d\n", scale3d_is_enabled());
599
600         return res;
601 }
602
603 static ssize_t enable_3d_scaling_store(struct device *dev,
604         struct device_attribute *attr, const char *buf, size_t count)
605 {
606         unsigned long val = 0;
607
608         if (strict_strtoul(buf, 10, &val) < 0)
609                 return -EINVAL;
610
611         scale3d_enable(val);
612
613         return count;
614 }
615
616 static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
617         enable_3d_scaling_show, enable_3d_scaling_store);
618
619 void nvhost_scale3d_init(struct nvhost_device *d)
620 {
621         if (!scale3d.init) {
622                 int error;
623                 unsigned long max_emc, min_emc;
624                 long correction;
625                 mutex_init(&scale3d.lock);
626
627                 INIT_WORK(&scale3d.work, scale3d_clocks_handler);
628                 INIT_DELAYED_WORK(&scale3d.idle_timer, scale3d_idle_handler);
629
630                 scale3d.clk_3d = d->clk[0];
631                 if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
632                         scale3d.clk_3d2 = d->clk[1];
633                         scale3d.clk_3d_emc = d->clk[2];
634                 } else
635                         scale3d.clk_3d_emc = d->clk[1];
636
637                 scale3d.max_rate_3d = clk_round_rate(scale3d.clk_3d, UINT_MAX);
638                 scale3d.min_rate_3d = clk_round_rate(scale3d.clk_3d, 0);
639
640                 if (scale3d.max_rate_3d == scale3d.min_rate_3d) {
641                         pr_warn("scale3d: 3d max rate = min rate (%lu), "
642                                 "disabling\n", scale3d.max_rate_3d);
643                         scale3d.enable = 0;
644                         return;
645                 }
646
647                 /* emc scaling:
648                  *
649                  * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
650                  *
651                  * Remc - 3d.emc rate
652                  * R3d  - 3d.cbus rate
653                  * Rm   - 3d.cbus 'middle' rate = (max + min)/2
654                  * S    - emc_slope
655                  * O    - emc_offset
656                  * Sd   - emc_dip_slope
657                  * Od   - emc_dip_offset
658                  *
659                  * this superposes a quadratic dip centered around the middle 3d
660                  * frequency over a linear correlation of 3d.emc to 3d clock
661                  * rates.
662                  *
663                  * S, O are chosen so that the maximum 3d rate produces the
664                  * maximum 3d.emc rate exactly, and the minimum 3d rate produces
665                  * at least the minimum 3d.emc rate.
666                  *
667                  * Sd and Od are chosen to produce the largest dip that will
668                  * keep 3d.emc frequencies monotonously decreasing with 3d
669                  * frequencies. To achieve this, the first derivative of Remc
670                  * with respect to R3d should be zero for the minimal 3d rate:
671                  *
672                  *   R'emc = S - 2 * Sd * (R3d - Rm)
673                  *   R'emc(R3d-min) = 0
674                  *   S = 2 * Sd * (R3d-min - Rm)
675                  *     = 2 * Sd * (R3d-min - R3d-max) / 2
676                  *   Sd = S / (R3d-min - R3d-max)
677                  *
678                  *   +---------------------------------------------------+
679                  *   | Sd = -(emc-max - emc-min) / (R3d-min - R3d-max)^2 |
680                  *   +---------------------------------------------------+
681                  *
682                  *   dip = Sd * (R3d - Rm)^2 + Od
683                  *
684                  * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
685                  *
686                  *   Sd * (R3d-min - Rm)^2 + Od = 0
687                  *   Od = -Sd * ((R3d-min - R3d-max) / 2)^2
688                  *      = -Sd * ((R3d-min - R3d-max)^2) / 4
689                  *
690                  *   +------------------------------+
691                  *   | Od = (emc-max - emc-min) / 4 |
692                  *   +------------------------------+
693                  */
694
695                 max_emc = clk_round_rate(scale3d.clk_3d_emc, UINT_MAX);
696                 min_emc = clk_round_rate(scale3d.clk_3d_emc, 0);
697
698                 scale3d.emc_slope = (max_emc - min_emc) /
699                          (scale3d.max_rate_3d - scale3d.min_rate_3d);
700                 scale3d.emc_offset = max_emc -
701                         scale3d.emc_slope * scale3d.max_rate_3d;
702                 /* guarantee max 3d rate maps to max emc rate */
703                 scale3d.emc_offset += max_emc -
704                         (scale3d.emc_slope * scale3d.max_rate_3d +
705                         scale3d.emc_offset);
706
707                 scale3d.emc_dip_offset = (max_emc - min_emc) / 4;
708                 scale3d.emc_dip_slope =
709                         -4 * (scale3d.emc_dip_offset /
710                         (POW2(scale3d.max_rate_3d - scale3d.min_rate_3d)));
711                 scale3d.emc_xmid =
712                         (scale3d.max_rate_3d + scale3d.min_rate_3d) / 2;
713                 correction =
714                         scale3d.emc_dip_offset +
715                                 scale3d.emc_dip_slope *
716                                 POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
717                 scale3d.emc_dip_offset -= correction;
718
719                 /* set scaling parameter defaults */
720                 scale3d.enable = 1;
721                 scale3d.period = scale3d.p_period = 100000;
722                 scale3d.idle_min = scale3d.p_idle_min = 10;
723                 scale3d.idle_max = scale3d.p_idle_max = 15;
724                 scale3d.fast_response = scale3d.p_fast_response = 7000;
725                 scale3d.p_scale_emc = 1;
726                 scale3d.p_emc_dip = 1;
727                 scale3d.p_verbosity = 0;
728                 scale3d.p_adjust = 1;
729                 scale3d.p_use_throughput_hint = 0;
730                 scale3d.p_throughput_lo_limit = 95;
731                 scale3d.p_throughput_hi_limit = 100;
732                 scale3d.p_scale_step = 60000000;
733
734                 error = device_create_file(&d->dev,
735                                 &dev_attr_enable_3d_scaling);
736                 if (error)
737                         dev_err(&d->dev, "failed to create sysfs attributes");
738
739                 scale3d.init = 1;
740         }
741
742         nvhost_scale3d_reset();
743 }
744
745 void nvhost_scale3d_deinit(struct nvhost_device *dev)
746 {
747         device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
748         scale3d.init = 0;
749 }