config: tegra3: enable /dev mount with ACL
[linux-2.6.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
6  *
7  * This software is licensed under the terms of the GNU General Public
8  * License version 2, as published by the Free Software Foundation, and
9  * may be copied, distributed, and modified under those terms.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Author: Mike Chan (mike@android.com)
17  *
18  */
19
20 #include <linux/cpu.h>
21 #include <linux/cpumask.h>
22 #include <linux/cpufreq.h>
23 #include <linux/mutex.h>
24 #include <linux/sched.h>
25 #include <linux/tick.h>
26 #include <linux/timer.h>
27 #include <linux/workqueue.h>
28 #include <linux/kthread.h>
29 #include <linux/mutex.h>
30
31 #include <asm/cputime.h>
32
33 struct cpufreq_interactive_cpuinfo {
34         struct timer_list cpu_timer;
35         int timer_idlecancel;
36         u64 time_in_idle;
37         u64 time_in_iowait;
38         u64 idle_exit_time;
39         u64 timer_run_time;
40         int idling;
41         u64 freq_change_time;
42         u64 freq_change_time_in_idle;
43         u64 freq_change_time_in_iowait;
44         u64 last_high_freq_time;
45         struct cpufreq_policy *policy;
46         struct cpufreq_frequency_table *freq_table;
47         unsigned int target_freq;
48         int governor_enabled;
49 };
50
51 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
52
53 /* Workqueues handle frequency scaling */
54 static struct task_struct *up_task;
55 static struct workqueue_struct *down_wq;
56 static struct work_struct freq_scale_down_work;
57 static cpumask_t up_cpumask;
58 static spinlock_t up_cpumask_lock;
59 static cpumask_t down_cpumask;
60 static spinlock_t down_cpumask_lock;
61 static struct mutex set_speed_lock;
62 static struct mutex gov_state_lock;
63 static struct kobject *interactive_kobj;
64 static unsigned int active_count;
65
66 /* Go to max speed when CPU load at or above this value. */
67 #define DEFAULT_GO_MAXSPEED_LOAD 85
68 static unsigned long go_maxspeed_load;
69
70 /* Base of exponential raise to max speed; if 0 - jump to maximum */
71 static unsigned long boost_factor;
72
73 /* Max frequency boost in Hz; if 0 - no max is enforced */
74 static unsigned long max_boost;
75
76 /* Consider IO as busy */
77 static unsigned long io_is_busy;
78
79 /*
80  * Targeted sustainable load relatively to current frequency.
81  * If 0, target is set realtively to the max speed
82  */
83 static unsigned long sustain_load;
84
85 /*
86  * The minimum amount of time to spend at a frequency before we can ramp down.
87  */
88 #define DEFAULT_MIN_SAMPLE_TIME 30000;
89 static unsigned long min_sample_time;
90
91 /*
92  * The sample rate of the timer used to increase frequency
93  */
94 #define DEFAULT_TIMER_RATE 20000;
95 static unsigned long timer_rate;
96
97 /*
98  * The minimum delay before frequency is allowed to raise over normal rate.
99  * Since it must remain at high frequency for a minimum of MIN_SAMPLE_TIME
100  * once it rises, setting this delay to a multiple of MIN_SAMPLE_TIME
101  * becomes the best way to enforce a square wave.
102  * e.g. 5*MIN_SAMPLE_TIME = 20% high freq duty cycle
103  */
104 #define DEFAULT_HIGH_FREQ_MIN_DELAY 5*DEFAULT_MIN_SAMPLE_TIME
105 static unsigned long high_freq_min_delay;
106
107 /*
108  * The maximum frequency CPUs are allowed to run normally
109  * 0 if disabled
110  */
111 #define DEFAULT_MAX_NORMAL_FREQ 0
112 static unsigned long max_normal_freq;
113
114
115 /* Defines to control mid-range frequencies */
116 #define DEFAULT_MID_RANGE_GO_MAXSPEED_LOAD 95
117
118 static unsigned long midrange_freq;
119 static unsigned long midrange_go_maxspeed_load;
120 static unsigned long midrange_max_boost;
121
122 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
123                 unsigned int event);
124
125 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
126 static
127 #endif
128 struct cpufreq_governor cpufreq_gov_interactive = {
129         .name = "interactive",
130         .governor = cpufreq_governor_interactive,
131         .max_transition_latency = 10000000,
132         .owner = THIS_MODULE,
133 };
134
135 static unsigned int cpufreq_interactive_get_target(
136         int cpu_load, int load_since_change, struct cpufreq_policy *policy)
137 {
138         unsigned int target_freq;
139         unsigned int maxspeed_load = go_maxspeed_load;
140         unsigned int mboost = max_boost;
141
142         /*
143          * Choose greater of short-term load (since last idle timer
144          * started or timer function re-armed itself) or long-term load
145          * (since last frequency change).
146          */
147         if (load_since_change > cpu_load)
148                 cpu_load = load_since_change;
149
150         if (midrange_freq && policy->cur > midrange_freq) {
151                 maxspeed_load = midrange_go_maxspeed_load;
152                 mboost = midrange_max_boost;
153         }
154
155         if (cpu_load >= maxspeed_load) {
156                 if (!boost_factor)
157                         return policy->max;
158
159                 target_freq = policy->cur * boost_factor;
160
161                 if (mboost && target_freq > policy->cur + mboost)
162                         target_freq = policy->cur + mboost;
163         }
164         else {
165                 if (!sustain_load)
166                         return policy->max * cpu_load / 100;
167
168                 target_freq = policy->cur * cpu_load / sustain_load;
169         }
170
171         target_freq = min(target_freq, policy->max);
172         return target_freq;
173 }
174
175 static inline cputime64_t get_cpu_iowait_time(
176         unsigned int cpu, cputime64_t *wall)
177 {
178         u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
179
180         if (iowait_time == -1ULL)
181                 return 0;
182
183         return iowait_time;
184 }
185
186 static void cpufreq_interactive_timer(unsigned long data)
187 {
188         unsigned int delta_idle;
189         unsigned int delta_iowait;
190         unsigned int delta_time;
191         int cpu_load;
192         int load_since_change;
193         u64 time_in_idle;
194         u64 time_in_iowait;
195         u64 idle_exit_time;
196         struct cpufreq_interactive_cpuinfo *pcpu =
197                 &per_cpu(cpuinfo, data);
198         u64 now_idle;
199         u64 now_iowait;
200         unsigned int new_freq;
201         unsigned int index;
202         unsigned long flags;
203
204         smp_rmb();
205
206         if (!pcpu->governor_enabled)
207                 goto exit;
208
209         /*
210          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
211          * this lets idle exit know the current idle time sample has
212          * been processed, and idle exit can generate a new sample and
213          * re-arm the timer.  This prevents a concurrent idle
214          * exit on that CPU from writing a new set of info at the same time
215          * the timer function runs (the timer function can't use that info
216          * until more time passes).
217          */
218         time_in_idle = pcpu->time_in_idle;
219         time_in_iowait = pcpu->time_in_iowait;
220         idle_exit_time = pcpu->idle_exit_time;
221         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
222         now_iowait = get_cpu_iowait_time(data, NULL);
223         smp_wmb();
224
225         /* If we raced with cancelling a timer, skip. */
226         if (!idle_exit_time)
227                 goto exit;
228
229         delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
230         delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait);
231         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
232                                                   idle_exit_time);
233
234         /*
235          * If timer ran less than 1ms after short-term sample started, retry.
236          */
237         if (delta_time < 1000)
238                 goto rearm;
239
240         if (delta_idle > delta_time)
241                 cpu_load = 0;
242         else {
243                 if (io_is_busy && delta_idle >= delta_iowait)
244                         delta_idle -= delta_iowait;
245
246                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
247         }
248
249         delta_idle = (unsigned int) cputime64_sub(now_idle,
250                                                 pcpu->freq_change_time_in_idle);
251         delta_iowait = (unsigned int) cputime64_sub(now_iowait,
252                                         pcpu->freq_change_time_in_iowait);
253         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
254                                                   pcpu->freq_change_time);
255
256         if ((delta_time == 0) || (delta_idle > delta_time))
257                 load_since_change = 0;
258         else {
259                 if (io_is_busy && delta_idle >= delta_iowait)
260                         delta_idle -= delta_iowait;
261
262                 load_since_change =
263                         100 * (delta_time - delta_idle) / delta_time;
264         }
265
266         /*
267          * Combine short-term load (since last idle timer started or timer
268          * function re-armed itself) and long-term load (since last frequency
269          * change) to determine new target frequency
270          */
271         new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change,
272                                                   pcpu->policy);
273
274         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
275                                            new_freq, CPUFREQ_RELATION_H,
276                                            &index)) {
277                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
278                              (int) data);
279                 goto rearm;
280         }
281
282         new_freq = pcpu->freq_table[index].frequency;
283
284         if (pcpu->target_freq == new_freq)
285                 goto rearm_if_notmax;
286
287         /*
288          * Do not scale down unless we have been at this frequency for the
289          * minimum sample time.
290          */
291         if (new_freq < pcpu->target_freq) {
292                 if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time)
293                     < min_sample_time)
294                         goto rearm;
295         }
296
297         /*
298          * Can only overclock if the delay is satisfy. Otherwise, cap it to
299          * maximum allowed normal frequency
300          */
301         if (max_normal_freq && (new_freq > max_normal_freq)) {
302                 if (cputime64_sub(pcpu->timer_run_time, pcpu->last_high_freq_time)
303                                 < high_freq_min_delay) {
304                         new_freq = max_normal_freq;
305                 }
306                 else {
307                         pcpu->last_high_freq_time = pcpu->timer_run_time;
308                 }
309         }
310
311         if (new_freq < pcpu->target_freq) {
312                 pcpu->target_freq = new_freq;
313                 spin_lock_irqsave(&down_cpumask_lock, flags);
314                 cpumask_set_cpu(data, &down_cpumask);
315                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
316                 queue_work(down_wq, &freq_scale_down_work);
317         } else {
318                 pcpu->target_freq = new_freq;
319                 spin_lock_irqsave(&up_cpumask_lock, flags);
320                 cpumask_set_cpu(data, &up_cpumask);
321                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
322                 wake_up_process(up_task);
323         }
324
325 rearm_if_notmax:
326         /*
327          * Already set max speed and don't see a need to change that,
328          * wait until next idle to re-evaluate, don't need timer.
329          */
330         if (pcpu->target_freq == pcpu->policy->max)
331                 goto exit;
332
333 rearm:
334         if (!timer_pending(&pcpu->cpu_timer)) {
335                 /*
336                  * If already at min: if that CPU is idle, don't set timer.
337                  * Else cancel the timer if that CPU goes idle.  We don't
338                  * need to re-evaluate speed until the next idle exit.
339                  */
340                 if (pcpu->target_freq == pcpu->policy->min) {
341                         smp_rmb();
342
343                         if (pcpu->idling)
344                                 goto exit;
345
346                         pcpu->timer_idlecancel = 1;
347                 }
348
349                 pcpu->time_in_idle = get_cpu_idle_time_us(
350                         data, &pcpu->idle_exit_time);
351                 pcpu->time_in_iowait = get_cpu_iowait_time(
352                         data, NULL);
353
354                 mod_timer(&pcpu->cpu_timer,
355                           jiffies + usecs_to_jiffies(timer_rate));
356         }
357
358 exit:
359         return;
360 }
361
362 static void cpufreq_interactive_idle_start(void)
363 {
364         struct cpufreq_interactive_cpuinfo *pcpu =
365                 &per_cpu(cpuinfo, smp_processor_id());
366         int pending;
367
368         if (!pcpu->governor_enabled)
369                 return;
370
371         pcpu->idling = 1;
372         smp_wmb();
373         pending = timer_pending(&pcpu->cpu_timer);
374
375         if (pcpu->target_freq != pcpu->policy->min) {
376 #ifdef CONFIG_SMP
377                 /*
378                  * Entering idle while not at lowest speed.  On some
379                  * platforms this can hold the other CPU(s) at that speed
380                  * even though the CPU is idle. Set a timer to re-evaluate
381                  * speed so this idle CPU doesn't hold the other CPUs above
382                  * min indefinitely.  This should probably be a quirk of
383                  * the CPUFreq driver.
384                  */
385                 if (!pending) {
386                         pcpu->time_in_idle = get_cpu_idle_time_us(
387                                 smp_processor_id(), &pcpu->idle_exit_time);
388                         pcpu->time_in_iowait = get_cpu_iowait_time(
389                                 smp_processor_id(), NULL);
390                         pcpu->timer_idlecancel = 0;
391                         mod_timer(&pcpu->cpu_timer,
392                                   jiffies + usecs_to_jiffies(timer_rate));
393                 }
394 #endif
395         } else {
396                 /*
397                  * If at min speed and entering idle after load has
398                  * already been evaluated, and a timer has been set just in
399                  * case the CPU suddenly goes busy, cancel that timer.  The
400                  * CPU didn't go busy; we'll recheck things upon idle exit.
401                  */
402                 if (pending && pcpu->timer_idlecancel) {
403                         del_timer(&pcpu->cpu_timer);
404                         /*
405                          * Ensure last timer run time is after current idle
406                          * sample start time, so next idle exit will always
407                          * start a new idle sampling period.
408                          */
409                         pcpu->idle_exit_time = 0;
410                         pcpu->timer_idlecancel = 0;
411                 }
412         }
413
414 }
415
416 static void cpufreq_interactive_idle_end(void)
417 {
418         struct cpufreq_interactive_cpuinfo *pcpu =
419                 &per_cpu(cpuinfo, smp_processor_id());
420
421         pcpu->idling = 0;
422         smp_wmb();
423
424         /*
425          * Arm the timer for 1-2 ticks later if not already, and if the timer
426          * function has already processed the previous load sampling
427          * interval.  (If the timer is not pending but has not processed
428          * the previous interval, it is probably racing with us on another
429          * CPU.  Let it compute load based on the previous sample and then
430          * re-arm the timer for another interval when it's done, rather
431          * than updating the interval start time to be "now", which doesn't
432          * give the timer function enough time to make a decision on this
433          * run.)
434          */
435         if (timer_pending(&pcpu->cpu_timer) == 0 &&
436             pcpu->timer_run_time >= pcpu->idle_exit_time &&
437             pcpu->governor_enabled) {
438                 pcpu->time_in_idle =
439                         get_cpu_idle_time_us(smp_processor_id(),
440                                              &pcpu->idle_exit_time);
441                 pcpu->time_in_iowait =
442                         get_cpu_iowait_time(smp_processor_id(),
443                                                 NULL);
444                 pcpu->timer_idlecancel = 0;
445                 mod_timer(&pcpu->cpu_timer,
446                           jiffies + usecs_to_jiffies(timer_rate));
447         }
448
449 }
450
451 static int cpufreq_interactive_up_task(void *data)
452 {
453         unsigned int cpu;
454         cpumask_t tmp_mask;
455         unsigned long flags;
456         struct cpufreq_interactive_cpuinfo *pcpu;
457
458         while (1) {
459                 set_current_state(TASK_INTERRUPTIBLE);
460                 spin_lock_irqsave(&up_cpumask_lock, flags);
461
462                 if (cpumask_empty(&up_cpumask)) {
463                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
464                         schedule();
465
466                         if (kthread_should_stop())
467                                 break;
468
469                         spin_lock_irqsave(&up_cpumask_lock, flags);
470                 }
471
472                 set_current_state(TASK_RUNNING);
473                 tmp_mask = up_cpumask;
474                 cpumask_clear(&up_cpumask);
475                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
476
477                 for_each_cpu(cpu, &tmp_mask) {
478                         unsigned int j;
479                         unsigned int max_freq = 0;
480
481                         pcpu = &per_cpu(cpuinfo, cpu);
482                         smp_rmb();
483
484                         if (!pcpu->governor_enabled)
485                                 continue;
486
487                         mutex_lock(&set_speed_lock);
488
489                         for_each_cpu(j, pcpu->policy->cpus) {
490                                 struct cpufreq_interactive_cpuinfo *pjcpu =
491                                         &per_cpu(cpuinfo, j);
492
493                                 if (pjcpu->target_freq > max_freq)
494                                         max_freq = pjcpu->target_freq;
495                         }
496
497                         __cpufreq_driver_target(pcpu->policy,
498                                                 max_freq,
499                                                 CPUFREQ_RELATION_H);
500                         mutex_unlock(&set_speed_lock);
501
502                         pcpu->freq_change_time_in_idle =
503                                 get_cpu_idle_time_us(cpu,
504                                                      &pcpu->freq_change_time);
505                         pcpu->freq_change_time_in_iowait =
506                                 get_cpu_iowait_time(cpu, NULL);
507                 }
508         }
509
510         return 0;
511 }
512
513 static void cpufreq_interactive_freq_down(struct work_struct *work)
514 {
515         unsigned int cpu;
516         cpumask_t tmp_mask;
517         unsigned long flags;
518         struct cpufreq_interactive_cpuinfo *pcpu;
519
520         spin_lock_irqsave(&down_cpumask_lock, flags);
521         tmp_mask = down_cpumask;
522         cpumask_clear(&down_cpumask);
523         spin_unlock_irqrestore(&down_cpumask_lock, flags);
524
525         for_each_cpu(cpu, &tmp_mask) {
526                 unsigned int j;
527                 unsigned int max_freq = 0;
528
529                 pcpu = &per_cpu(cpuinfo, cpu);
530                 smp_rmb();
531
532                 if (!pcpu->governor_enabled)
533                         continue;
534
535                 mutex_lock(&set_speed_lock);
536
537                 for_each_cpu(j, pcpu->policy->cpus) {
538                         struct cpufreq_interactive_cpuinfo *pjcpu =
539                                 &per_cpu(cpuinfo, j);
540
541                         if (pjcpu->target_freq > max_freq)
542                                 max_freq = pjcpu->target_freq;
543                 }
544
545                 __cpufreq_driver_target(pcpu->policy, max_freq,
546                                         CPUFREQ_RELATION_H);
547
548                 mutex_unlock(&set_speed_lock);
549                 pcpu->freq_change_time_in_idle =
550                         get_cpu_idle_time_us(cpu,
551                                              &pcpu->freq_change_time);
552                 pcpu->freq_change_time_in_iowait =
553                         get_cpu_iowait_time(cpu, NULL);
554         }
555 }
556
557 #define DECL_CPUFREQ_INTERACTIVE_ATTR(name) \
558 static ssize_t show_##name(struct kobject *kobj, \
559         struct attribute *attr, char *buf) \
560 { \
561         return sprintf(buf, "%lu\n", name); \
562 } \
563 \
564 static ssize_t store_##name(struct kobject *kobj,\
565                 struct attribute *attr, const char *buf, size_t count) \
566 { \
567         int ret; \
568         unsigned long val; \
569 \
570         ret = strict_strtoul(buf, 0, &val); \
571         if (ret < 0) \
572                 return ret; \
573         name = val; \
574         return count; \
575 } \
576 \
577 static struct global_attr name##_attr = __ATTR(name, 0644, \
578                 show_##name, store_##name);
579
580 DECL_CPUFREQ_INTERACTIVE_ATTR(go_maxspeed_load)
581 DECL_CPUFREQ_INTERACTIVE_ATTR(midrange_freq)
582 DECL_CPUFREQ_INTERACTIVE_ATTR(midrange_go_maxspeed_load)
583 DECL_CPUFREQ_INTERACTIVE_ATTR(boost_factor)
584 DECL_CPUFREQ_INTERACTIVE_ATTR(io_is_busy)
585 DECL_CPUFREQ_INTERACTIVE_ATTR(max_boost)
586 DECL_CPUFREQ_INTERACTIVE_ATTR(midrange_max_boost)
587 DECL_CPUFREQ_INTERACTIVE_ATTR(sustain_load)
588 DECL_CPUFREQ_INTERACTIVE_ATTR(min_sample_time)
589 DECL_CPUFREQ_INTERACTIVE_ATTR(timer_rate)
590 DECL_CPUFREQ_INTERACTIVE_ATTR(high_freq_min_delay)
591 DECL_CPUFREQ_INTERACTIVE_ATTR(max_normal_freq)
592
593 #undef DECL_CPUFREQ_INTERACTIVE_ATTR
594
595 static struct attribute *interactive_attributes[] = {
596         &go_maxspeed_load_attr.attr,
597         &midrange_freq_attr.attr,
598         &midrange_go_maxspeed_load_attr.attr,
599         &boost_factor_attr.attr,
600         &max_boost_attr.attr,
601         &midrange_max_boost_attr.attr,
602         &io_is_busy_attr.attr,
603         &sustain_load_attr.attr,
604         &min_sample_time_attr.attr,
605         &timer_rate_attr.attr,
606         &high_freq_min_delay_attr.attr,
607         &max_normal_freq_attr.attr,
608         NULL,
609 };
610
611 static struct attribute_group interactive_attr_group = {
612         .attrs = interactive_attributes,
613         .name = "interactive",
614 };
615
616 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
617                 unsigned int event)
618 {
619         int rc;
620         unsigned int j;
621         struct cpufreq_interactive_cpuinfo *pcpu;
622         struct cpufreq_frequency_table *freq_table;
623
624         switch (event) {
625         case CPUFREQ_GOV_START:
626                 if (!cpu_online(policy->cpu))
627                         return -EINVAL;
628
629                 freq_table =
630                         cpufreq_frequency_get_table(policy->cpu);
631
632                 for_each_cpu(j, policy->cpus) {
633                         pcpu = &per_cpu(cpuinfo, j);
634                         pcpu->policy = policy;
635                         pcpu->target_freq = policy->cur;
636                         pcpu->freq_table = freq_table;
637                         pcpu->freq_change_time_in_idle =
638                                 get_cpu_idle_time_us(j,
639                                              &pcpu->freq_change_time);
640                         pcpu->time_in_idle = pcpu->freq_change_time_in_idle;
641                         pcpu->idle_exit_time = pcpu->freq_change_time;
642                         pcpu->freq_change_time_in_iowait =
643                                 get_cpu_iowait_time(j, NULL);
644                         pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait;
645                         if (!pcpu->last_high_freq_time)
646                                 pcpu->last_high_freq_time = pcpu->freq_change_time;
647                         pcpu->timer_idlecancel = 1;
648                         pcpu->governor_enabled = 1;
649                         smp_wmb();
650
651                         if (!timer_pending(&pcpu->cpu_timer))
652                                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
653                 }
654
655                 mutex_lock(&gov_state_lock);
656                 active_count++;
657                 /*
658                  * Do not register the idle hook and create sysfs
659                  * entries if we have already done so.
660                  */
661                 if (active_count == 1) {
662                         rc = sysfs_create_group(cpufreq_global_kobject,
663                                         &interactive_attr_group);
664                         interactive_kobj = kobject_create_and_add(
665                                                 "gov_interactive",
666                                                 cpufreq_global_kobject);
667                         kobject_uevent(interactive_kobj, KOBJ_ADD);
668                         if (rc) {
669                                 mutex_unlock(&gov_state_lock);
670                                 return rc;
671                         }
672                 }
673                 mutex_unlock(&gov_state_lock);
674
675                 break;
676
677         case CPUFREQ_GOV_STOP:
678                 for_each_cpu(j, policy->cpus) {
679                         pcpu = &per_cpu(cpuinfo, j);
680                         pcpu->governor_enabled = 0;
681                         smp_wmb();
682                         del_timer_sync(&pcpu->cpu_timer);
683
684                         /*
685                          * Reset idle exit time since we may cancel the timer
686                          * before it can run after the last idle exit time,
687                          * to avoid tripping the check in idle exit for a timer
688                          * that is trying to run.
689                          */
690                         pcpu->idle_exit_time = 0;
691                 }
692
693                 flush_work(&freq_scale_down_work);
694                 mutex_lock(&gov_state_lock);
695
696                 active_count--;
697
698                 if (active_count == 0) {
699                         sysfs_remove_group(cpufreq_global_kobject,
700                                         &interactive_attr_group);
701                         kobject_uevent(interactive_kobj, KOBJ_REMOVE);
702                         kobject_put(interactive_kobj);
703                 }
704
705                 mutex_unlock(&gov_state_lock);
706
707                 break;
708
709         case CPUFREQ_GOV_LIMITS:
710                 if (policy->max < policy->cur)
711                         __cpufreq_driver_target(policy,
712                                         policy->max, CPUFREQ_RELATION_H);
713                 else if (policy->min > policy->cur)
714                         __cpufreq_driver_target(policy,
715                                         policy->min, CPUFREQ_RELATION_L);
716                 break;
717         }
718         return 0;
719 }
720
721 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
722                                              unsigned long val,
723                                              void *data)
724 {
725         switch (val) {
726         case IDLE_START:
727                 cpufreq_interactive_idle_start();
728                 break;
729         case IDLE_END:
730                 cpufreq_interactive_idle_end();
731                 break;
732         }
733
734         return 0;
735 }
736
737 static struct notifier_block cpufreq_interactive_idle_nb = {
738         .notifier_call = cpufreq_interactive_idle_notifier,
739 };
740
741 static int __init cpufreq_interactive_init(void)
742 {
743         unsigned int i;
744         struct cpufreq_interactive_cpuinfo *pcpu;
745         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
746
747         go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
748         midrange_go_maxspeed_load = DEFAULT_MID_RANGE_GO_MAXSPEED_LOAD;
749         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
750         timer_rate = DEFAULT_TIMER_RATE;
751         high_freq_min_delay = DEFAULT_HIGH_FREQ_MIN_DELAY;
752         max_normal_freq = DEFAULT_MAX_NORMAL_FREQ;
753
754         /* Initalize per-cpu timers */
755         for_each_possible_cpu(i) {
756                 pcpu = &per_cpu(cpuinfo, i);
757                 init_timer(&pcpu->cpu_timer);
758                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
759                 pcpu->cpu_timer.data = i;
760         }
761
762         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
763                                  "kinteractiveup");
764         if (IS_ERR(up_task))
765                 return PTR_ERR(up_task);
766
767         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
768         get_task_struct(up_task);
769
770         /* No rescuer thread, bind to CPU queuing the work for possibly
771            warm cache (probably doesn't matter much). */
772         down_wq = alloc_workqueue("knteractive_down", 0, 1);
773
774         if (!down_wq)
775                 goto err_freeuptask;
776
777         INIT_WORK(&freq_scale_down_work,
778                   cpufreq_interactive_freq_down);
779
780         spin_lock_init(&up_cpumask_lock);
781         spin_lock_init(&down_cpumask_lock);
782         mutex_init(&set_speed_lock);
783         mutex_init(&gov_state_lock);
784
785         idle_notifier_register(&cpufreq_interactive_idle_nb);
786
787         return cpufreq_register_governor(&cpufreq_gov_interactive);
788
789 err_freeuptask:
790         put_task_struct(up_task);
791         return -ENOMEM;
792 }
793
794 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
795 fs_initcall(cpufreq_interactive_init);
796 #else
797 module_init(cpufreq_interactive_init);
798 #endif
799
800 static void __exit cpufreq_interactive_exit(void)
801 {
802         cpufreq_unregister_governor(&cpufreq_gov_interactive);
803         kthread_stop(up_task);
804         put_task_struct(up_task);
805         destroy_workqueue(down_wq);
806 }
807
808 module_exit(cpufreq_interactive_exit);
809
810 MODULE_AUTHOR("Mike Chan <mike@android.com>");
811 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
812         "Latency sensitive workloads");
813 MODULE_LICENSE("GPL");