2d0934fea964a3f6cf14f7a2d5cb3e4973a1fac5
[linux-2.6.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/module.h>
23 #include <linux/mutex.h>
24 #include <linux/sched.h>
25 #include <linux/tick.h>
26 #include <linux/timer.h>
27 #include <linux/workqueue.h>
28 #include <linux/kthread.h>
29 #include <linux/mutex.h>
30
31 #include <asm/cputime.h>
32
33 static atomic_t active_count = ATOMIC_INIT(0);
34
35 struct cpufreq_interactive_cpuinfo {
36         struct timer_list cpu_timer;
37         int timer_idlecancel;
38         u64 time_in_idle;
39         u64 time_in_iowait;
40         u64 idle_exit_time;
41         u64 timer_run_time;
42         int idling;
43         u64 freq_change_time;
44         u64 freq_change_time_in_idle;
45         u64 freq_change_time_in_iowait;
46         struct cpufreq_policy *policy;
47         struct cpufreq_frequency_table *freq_table;
48         unsigned int target_freq;
49         int governor_enabled;
50 };
51
52 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
53
54 /* Workqueues handle frequency scaling */
55 static struct task_struct *up_task;
56 static struct workqueue_struct *down_wq;
57 static struct work_struct freq_scale_down_work;
58 static cpumask_t up_cpumask;
59 static spinlock_t up_cpumask_lock;
60 static cpumask_t down_cpumask;
61 static spinlock_t down_cpumask_lock;
62 static struct mutex set_speed_lock;
63
64 /* Go to max speed when CPU load at or above this value. */
65 #define DEFAULT_GO_MAXSPEED_LOAD 85
66 static unsigned long go_maxspeed_load;
67
68 /* Base of exponential raise to max speed; if 0 - jump to maximum */
69 static unsigned long boost_factor;
70
71 /* Max frequency boost in Hz; if 0 - no max is enforced */
72 static unsigned long max_boost;
73
74 /* Consider IO as busy */
75 static unsigned long io_is_busy;
76
77 /*
78  * Targeted sustainable load relatively to current frequency.
79  * If 0, target is set realtively to the max speed
80  */
81 static unsigned long sustain_load;
82
83 /*
84  * The minimum amount of time to spend at a frequency before we can ramp down.
85  */
86 #define DEFAULT_MIN_SAMPLE_TIME 30000;
87 static unsigned long min_sample_time;
88
89 /*
90  * The sample rate of the timer used to increase frequency
91  */
92 #define DEFAULT_TIMER_RATE 20000;
93 static unsigned long timer_rate;
94
95 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
96                 unsigned int event);
97
98 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
99 static
100 #endif
101 struct cpufreq_governor cpufreq_gov_interactive = {
102         .name = "interactive",
103         .governor = cpufreq_governor_interactive,
104         .max_transition_latency = 10000000,
105         .owner = THIS_MODULE,
106 };
107
108 static unsigned int cpufreq_interactive_get_target(
109         int cpu_load, int load_since_change, struct cpufreq_policy *policy)
110 {
111         unsigned int target_freq;
112
113         /*
114          * Choose greater of short-term load (since last idle timer
115          * started or timer function re-armed itself) or long-term load
116          * (since last frequency change).
117          */
118         if (load_since_change > cpu_load)
119                 cpu_load = load_since_change;
120
121         if (cpu_load >= go_maxspeed_load) {
122                 if (!boost_factor)
123                         return policy->max;
124
125                 target_freq = policy->cur * boost_factor;
126
127                 if (max_boost && target_freq > policy->cur + max_boost)
128                         target_freq = policy->cur + max_boost;
129         }
130         else {
131                 if (!sustain_load)
132                         return policy->max * cpu_load / 100;
133
134                 target_freq = policy->cur * cpu_load / sustain_load;
135         }
136
137         target_freq = min(target_freq, policy->max);
138         return target_freq;
139 }
140
141 static inline cputime64_t get_cpu_iowait_time(
142         unsigned int cpu, cputime64_t *wall)
143 {
144         u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
145
146         if (iowait_time == -1ULL)
147                 return 0;
148
149         return iowait_time;
150 }
151
152 static void cpufreq_interactive_timer(unsigned long data)
153 {
154         unsigned int delta_idle;
155         unsigned int delta_iowait;
156         unsigned int delta_time;
157         int cpu_load;
158         int load_since_change;
159         u64 time_in_idle;
160         u64 time_in_iowait;
161         u64 idle_exit_time;
162         struct cpufreq_interactive_cpuinfo *pcpu =
163                 &per_cpu(cpuinfo, data);
164         u64 now_idle;
165         u64 now_iowait;
166         unsigned int new_freq;
167         unsigned int index;
168         unsigned long flags;
169
170         smp_rmb();
171
172         if (!pcpu->governor_enabled)
173                 goto exit;
174
175         /*
176          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
177          * this lets idle exit know the current idle time sample has
178          * been processed, and idle exit can generate a new sample and
179          * re-arm the timer.  This prevents a concurrent idle
180          * exit on that CPU from writing a new set of info at the same time
181          * the timer function runs (the timer function can't use that info
182          * until more time passes).
183          */
184         time_in_idle = pcpu->time_in_idle;
185         time_in_iowait = pcpu->time_in_iowait;
186         idle_exit_time = pcpu->idle_exit_time;
187         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
188         now_iowait = get_cpu_iowait_time(data, NULL);
189         smp_wmb();
190
191         /* If we raced with cancelling a timer, skip. */
192         if (!idle_exit_time)
193                 goto exit;
194
195         delta_idle = (unsigned int)(now_idle - time_in_idle);
196         delta_iowait = (unsigned int)(now_iowait - time_in_iowait);
197         delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
198
199         /*
200          * If timer ran less than 1ms after short-term sample started, retry.
201          */
202         if (delta_time < 1000)
203                 goto rearm;
204
205         if (delta_idle > delta_time)
206                 cpu_load = 0;
207         else {
208                 if (io_is_busy && delta_idle >= delta_iowait)
209                         delta_idle -= delta_iowait;
210
211                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
212         }
213
214         delta_idle = (unsigned int)(now_idle - pcpu->freq_change_time_in_idle);
215         delta_iowait = (unsigned int)(now_iowait - pcpu->freq_change_time_in_iowait);
216         delta_time = (unsigned int)(pcpu->timer_run_time - pcpu->freq_change_time);
217
218         if ((delta_time == 0) || (delta_idle > delta_time))
219                 load_since_change = 0;
220         else {
221                 if (io_is_busy && delta_idle >= delta_iowait)
222                         delta_idle -= delta_iowait;
223
224                 load_since_change =
225                         100 * (delta_time - delta_idle) / delta_time;
226         }
227
228         /*
229          * Combine short-term load (since last idle timer started or timer
230          * function re-armed itself) and long-term load (since last frequency
231          * change) to determine new target frequency
232          */
233         new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change,
234                                                   pcpu->policy);
235
236         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
237                                            new_freq, CPUFREQ_RELATION_H,
238                                            &index)) {
239                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
240                              (int) data);
241                 goto rearm;
242         }
243
244         new_freq = pcpu->freq_table[index].frequency;
245
246         if (pcpu->target_freq == new_freq)
247                 goto rearm_if_notmax;
248
249         /*
250          * Do not scale down unless we have been at this frequency for the
251          * minimum sample time.
252          */
253         if (new_freq < pcpu->target_freq) {
254                 if (pcpu->timer_run_time - pcpu->freq_change_time
255                     < min_sample_time)
256                         goto rearm;
257         }
258
259         if (new_freq < pcpu->target_freq) {
260                 pcpu->target_freq = new_freq;
261                 spin_lock_irqsave(&down_cpumask_lock, flags);
262                 cpumask_set_cpu(data, &down_cpumask);
263                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
264                 queue_work(down_wq, &freq_scale_down_work);
265         } else {
266                 pcpu->target_freq = new_freq;
267                 spin_lock_irqsave(&up_cpumask_lock, flags);
268                 cpumask_set_cpu(data, &up_cpumask);
269                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
270                 wake_up_process(up_task);
271         }
272
273 rearm_if_notmax:
274         /*
275          * Already set max speed and don't see a need to change that,
276          * wait until next idle to re-evaluate, don't need timer.
277          */
278         if (pcpu->target_freq == pcpu->policy->max)
279                 goto exit;
280
281 rearm:
282         if (!timer_pending(&pcpu->cpu_timer)) {
283                 /*
284                  * If already at min: if that CPU is idle, don't set timer.
285                  * Else cancel the timer if that CPU goes idle.  We don't
286                  * need to re-evaluate speed until the next idle exit.
287                  */
288                 if (pcpu->target_freq == pcpu->policy->min) {
289                         smp_rmb();
290
291                         if (pcpu->idling)
292                                 goto exit;
293
294                         pcpu->timer_idlecancel = 1;
295                 }
296
297                 pcpu->time_in_idle = get_cpu_idle_time_us(
298                         data, &pcpu->idle_exit_time);
299                 pcpu->time_in_iowait = get_cpu_iowait_time(
300                         data, NULL);
301
302                 mod_timer(&pcpu->cpu_timer,
303                           jiffies + usecs_to_jiffies(timer_rate));
304         }
305
306 exit:
307         return;
308 }
309
310 static void cpufreq_interactive_idle_start(void)
311 {
312         struct cpufreq_interactive_cpuinfo *pcpu =
313                 &per_cpu(cpuinfo, smp_processor_id());
314         int pending;
315
316         if (!pcpu->governor_enabled)
317                 return;
318
319         pcpu->idling = 1;
320         smp_wmb();
321         pending = timer_pending(&pcpu->cpu_timer);
322
323         if (pcpu->target_freq != pcpu->policy->min) {
324 #ifdef CONFIG_SMP
325                 /*
326                  * Entering idle while not at lowest speed.  On some
327                  * platforms this can hold the other CPU(s) at that speed
328                  * even though the CPU is idle. Set a timer to re-evaluate
329                  * speed so this idle CPU doesn't hold the other CPUs above
330                  * min indefinitely.  This should probably be a quirk of
331                  * the CPUFreq driver.
332                  */
333                 if (!pending) {
334                         pcpu->time_in_idle = get_cpu_idle_time_us(
335                                 smp_processor_id(), &pcpu->idle_exit_time);
336                         pcpu->time_in_iowait = get_cpu_iowait_time(
337                                 smp_processor_id(), NULL);
338                         pcpu->timer_idlecancel = 0;
339                         mod_timer(&pcpu->cpu_timer,
340                                   jiffies + usecs_to_jiffies(timer_rate));
341                 }
342 #endif
343         } else {
344                 /*
345                  * If at min speed and entering idle after load has
346                  * already been evaluated, and a timer has been set just in
347                  * case the CPU suddenly goes busy, cancel that timer.  The
348                  * CPU didn't go busy; we'll recheck things upon idle exit.
349                  */
350                 if (pending && pcpu->timer_idlecancel) {
351                         del_timer(&pcpu->cpu_timer);
352                         /*
353                          * Ensure last timer run time is after current idle
354                          * sample start time, so next idle exit will always
355                          * start a new idle sampling period.
356                          */
357                         pcpu->idle_exit_time = 0;
358                         pcpu->timer_idlecancel = 0;
359                 }
360         }
361
362 }
363
364 static void cpufreq_interactive_idle_end(void)
365 {
366         struct cpufreq_interactive_cpuinfo *pcpu =
367                 &per_cpu(cpuinfo, smp_processor_id());
368
369         pcpu->idling = 0;
370         smp_wmb();
371
372         /*
373          * Arm the timer for 1-2 ticks later if not already, and if the timer
374          * function has already processed the previous load sampling
375          * interval.  (If the timer is not pending but has not processed
376          * the previous interval, it is probably racing with us on another
377          * CPU.  Let it compute load based on the previous sample and then
378          * re-arm the timer for another interval when it's done, rather
379          * than updating the interval start time to be "now", which doesn't
380          * give the timer function enough time to make a decision on this
381          * run.)
382          */
383         if (timer_pending(&pcpu->cpu_timer) == 0 &&
384             pcpu->timer_run_time >= pcpu->idle_exit_time &&
385             pcpu->governor_enabled) {
386                 pcpu->time_in_idle =
387                         get_cpu_idle_time_us(smp_processor_id(),
388                                              &pcpu->idle_exit_time);
389                 pcpu->time_in_iowait =
390                         get_cpu_iowait_time(smp_processor_id(),
391                                                 NULL);
392                 pcpu->timer_idlecancel = 0;
393                 mod_timer(&pcpu->cpu_timer,
394                           jiffies + usecs_to_jiffies(timer_rate));
395         }
396
397 }
398
399 static int cpufreq_interactive_up_task(void *data)
400 {
401         unsigned int cpu;
402         cpumask_t tmp_mask;
403         unsigned long flags;
404         struct cpufreq_interactive_cpuinfo *pcpu;
405
406         while (1) {
407                 set_current_state(TASK_INTERRUPTIBLE);
408                 spin_lock_irqsave(&up_cpumask_lock, flags);
409
410                 if (cpumask_empty(&up_cpumask)) {
411                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
412                         schedule();
413
414                         if (kthread_should_stop())
415                                 break;
416
417                         spin_lock_irqsave(&up_cpumask_lock, flags);
418                 }
419
420                 set_current_state(TASK_RUNNING);
421                 tmp_mask = up_cpumask;
422                 cpumask_clear(&up_cpumask);
423                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
424
425                 for_each_cpu(cpu, &tmp_mask) {
426                         unsigned int j;
427                         unsigned int max_freq = 0;
428
429                         pcpu = &per_cpu(cpuinfo, cpu);
430                         smp_rmb();
431
432                         if (!pcpu->governor_enabled)
433                                 continue;
434
435                         mutex_lock(&set_speed_lock);
436
437                         for_each_cpu(j, pcpu->policy->cpus) {
438                                 struct cpufreq_interactive_cpuinfo *pjcpu =
439                                         &per_cpu(cpuinfo, j);
440
441                                 if (pjcpu->target_freq > max_freq)
442                                         max_freq = pjcpu->target_freq;
443                         }
444
445                         __cpufreq_driver_target(pcpu->policy,
446                                                 max_freq,
447                                                 CPUFREQ_RELATION_H);
448                         mutex_unlock(&set_speed_lock);
449
450                         pcpu->freq_change_time_in_idle =
451                                 get_cpu_idle_time_us(cpu,
452                                                      &pcpu->freq_change_time);
453                         pcpu->freq_change_time_in_iowait =
454                                 get_cpu_iowait_time(cpu, NULL);
455                 }
456         }
457
458         return 0;
459 }
460
461 static void cpufreq_interactive_freq_down(struct work_struct *work)
462 {
463         unsigned int cpu;
464         cpumask_t tmp_mask;
465         unsigned long flags;
466         struct cpufreq_interactive_cpuinfo *pcpu;
467
468         spin_lock_irqsave(&down_cpumask_lock, flags);
469         tmp_mask = down_cpumask;
470         cpumask_clear(&down_cpumask);
471         spin_unlock_irqrestore(&down_cpumask_lock, flags);
472
473         for_each_cpu(cpu, &tmp_mask) {
474                 unsigned int j;
475                 unsigned int max_freq = 0;
476
477                 pcpu = &per_cpu(cpuinfo, cpu);
478                 smp_rmb();
479
480                 if (!pcpu->governor_enabled)
481                         continue;
482
483                 mutex_lock(&set_speed_lock);
484
485                 for_each_cpu(j, pcpu->policy->cpus) {
486                         struct cpufreq_interactive_cpuinfo *pjcpu =
487                                 &per_cpu(cpuinfo, j);
488
489                         if (pjcpu->target_freq > max_freq)
490                                 max_freq = pjcpu->target_freq;
491                 }
492
493                 __cpufreq_driver_target(pcpu->policy, max_freq,
494                                         CPUFREQ_RELATION_H);
495
496                 mutex_unlock(&set_speed_lock);
497                 pcpu->freq_change_time_in_idle =
498                         get_cpu_idle_time_us(cpu,
499                                              &pcpu->freq_change_time);
500                 pcpu->freq_change_time_in_iowait =
501                         get_cpu_iowait_time(cpu, NULL);
502         }
503 }
504
505 static ssize_t show_go_maxspeed_load(struct kobject *kobj,
506                                      struct attribute *attr, char *buf)
507 {
508         return sprintf(buf, "%lu\n", go_maxspeed_load);
509 }
510
511 static ssize_t store_go_maxspeed_load(struct kobject *kobj,
512                         struct attribute *attr, const char *buf, size_t count)
513 {
514         int ret;
515         unsigned long val;
516
517         ret = strict_strtoul(buf, 0, &val);
518         if (ret < 0)
519                 return ret;
520         go_maxspeed_load = val;
521         return count;
522 }
523
524 static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644,
525                 show_go_maxspeed_load, store_go_maxspeed_load);
526
527 static ssize_t show_boost_factor(struct kobject *kobj,
528                                      struct attribute *attr, char *buf)
529 {
530         return sprintf(buf, "%lu\n", boost_factor);
531 }
532
533 static ssize_t store_boost_factor(struct kobject *kobj,
534                         struct attribute *attr, const char *buf, size_t count)
535 {
536         int ret;
537         unsigned long val;
538
539         ret = strict_strtoul(buf, 0, &val);
540         if (ret < 0)
541                 return ret;
542         boost_factor = val;
543         return count;
544 }
545
546 static struct global_attr boost_factor_attr = __ATTR(boost_factor, 0644,
547                 show_boost_factor, store_boost_factor);
548
549 static ssize_t show_io_is_busy(struct kobject *kobj,
550                                      struct attribute *attr, char *buf)
551 {
552         return sprintf(buf, "%lu\n", io_is_busy);
553 }
554
555 static ssize_t store_io_is_busy(struct kobject *kobj,
556                         struct attribute *attr, const char *buf, size_t count)
557 {
558         if (!strict_strtoul(buf, 0, &io_is_busy))
559                 return count;
560         return -EINVAL;
561 }
562
563 static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644,
564                 show_io_is_busy, store_io_is_busy);
565
566 static ssize_t show_max_boost(struct kobject *kobj,
567                                      struct attribute *attr, char *buf)
568 {
569         return sprintf(buf, "%lu\n", max_boost);
570 }
571
572 static ssize_t store_max_boost(struct kobject *kobj,
573                         struct attribute *attr, const char *buf, size_t count)
574 {
575         int ret;
576         unsigned long val;
577
578         ret = strict_strtoul(buf, 0, &val);
579         if (ret < 0)
580                 return ret;
581         max_boost = val;
582         return count;
583 }
584
585 static struct global_attr max_boost_attr = __ATTR(max_boost, 0644,
586                 show_max_boost, store_max_boost);
587
588
589 static ssize_t show_sustain_load(struct kobject *kobj,
590                                      struct attribute *attr, char *buf)
591 {
592         return sprintf(buf, "%lu\n", sustain_load);
593 }
594
595 static ssize_t store_sustain_load(struct kobject *kobj,
596                         struct attribute *attr, const char *buf, size_t count)
597 {
598         int ret;
599         unsigned long val;
600
601         ret = strict_strtoul(buf, 0, &val);
602         if (ret < 0)
603                 return ret;
604         sustain_load = val;
605         return count;
606 }
607
608 static struct global_attr sustain_load_attr = __ATTR(sustain_load, 0644,
609                 show_sustain_load, store_sustain_load);
610
611 static ssize_t show_min_sample_time(struct kobject *kobj,
612                                 struct attribute *attr, char *buf)
613 {
614         return sprintf(buf, "%lu\n", min_sample_time);
615 }
616
617 static ssize_t store_min_sample_time(struct kobject *kobj,
618                         struct attribute *attr, const char *buf, size_t count)
619 {
620         int ret;
621         unsigned long val;
622
623         ret = strict_strtoul(buf, 0, &val);
624         if (ret < 0)
625                 return ret;
626         min_sample_time = val;
627         return count;
628 }
629
630 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
631                 show_min_sample_time, store_min_sample_time);
632
633 static ssize_t show_timer_rate(struct kobject *kobj,
634                         struct attribute *attr, char *buf)
635 {
636         return sprintf(buf, "%lu\n", timer_rate);
637 }
638
639 static ssize_t store_timer_rate(struct kobject *kobj,
640                         struct attribute *attr, const char *buf, size_t count)
641 {
642         int ret;
643         unsigned long val;
644
645         ret = strict_strtoul(buf, 0, &val);
646         if (ret < 0)
647                 return ret;
648         timer_rate = val;
649         return count;
650 }
651
652 static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
653                 show_timer_rate, store_timer_rate);
654
655 static struct attribute *interactive_attributes[] = {
656         &go_maxspeed_load_attr.attr,
657         &boost_factor_attr.attr,
658         &max_boost_attr.attr,
659         &io_is_busy_attr.attr,
660         &sustain_load_attr.attr,
661         &min_sample_time_attr.attr,
662         &timer_rate_attr.attr,
663         NULL,
664 };
665
666 static struct attribute_group interactive_attr_group = {
667         .attrs = interactive_attributes,
668         .name = "interactive",
669 };
670
671 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
672                 unsigned int event)
673 {
674         int rc;
675         unsigned int j;
676         struct cpufreq_interactive_cpuinfo *pcpu;
677         struct cpufreq_frequency_table *freq_table;
678
679         switch (event) {
680         case CPUFREQ_GOV_START:
681                 if (!cpu_online(policy->cpu))
682                         return -EINVAL;
683
684                 freq_table =
685                         cpufreq_frequency_get_table(policy->cpu);
686
687                 for_each_cpu(j, policy->cpus) {
688                         pcpu = &per_cpu(cpuinfo, j);
689                         pcpu->policy = policy;
690                         pcpu->target_freq = policy->cur;
691                         pcpu->freq_table = freq_table;
692                         pcpu->freq_change_time_in_idle =
693                                 get_cpu_idle_time_us(j,
694                                              &pcpu->freq_change_time);
695                         pcpu->time_in_idle = pcpu->freq_change_time_in_idle;
696                         pcpu->idle_exit_time = pcpu->freq_change_time;
697                         pcpu->freq_change_time_in_iowait =
698                                 get_cpu_iowait_time(j, NULL);
699                         pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait;
700
701                         pcpu->timer_idlecancel = 1;
702                         pcpu->governor_enabled = 1;
703                         smp_wmb();
704
705                         if (!timer_pending(&pcpu->cpu_timer))
706                                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
707                 }
708
709                 /*
710                  * Do not register the idle hook and create sysfs
711                  * entries if we have already done so.
712                  */
713                 if (atomic_inc_return(&active_count) > 1)
714                         return 0;
715
716                 rc = sysfs_create_group(cpufreq_global_kobject,
717                                 &interactive_attr_group);
718                 if (rc)
719                         return rc;
720
721                 break;
722
723         case CPUFREQ_GOV_STOP:
724                 for_each_cpu(j, policy->cpus) {
725                         pcpu = &per_cpu(cpuinfo, j);
726                         pcpu->governor_enabled = 0;
727                         smp_wmb();
728                         del_timer_sync(&pcpu->cpu_timer);
729
730                         /*
731                          * Reset idle exit time since we may cancel the timer
732                          * before it can run after the last idle exit time,
733                          * to avoid tripping the check in idle exit for a timer
734                          * that is trying to run.
735                          */
736                         pcpu->idle_exit_time = 0;
737                 }
738
739                 flush_work(&freq_scale_down_work);
740                 if (atomic_dec_return(&active_count) > 0)
741                         return 0;
742
743                 sysfs_remove_group(cpufreq_global_kobject,
744                                 &interactive_attr_group);
745
746                 break;
747
748         case CPUFREQ_GOV_LIMITS:
749                 if (policy->max < policy->cur)
750                         __cpufreq_driver_target(policy,
751                                         policy->max, CPUFREQ_RELATION_H);
752                 else if (policy->min > policy->cur)
753                         __cpufreq_driver_target(policy,
754                                         policy->min, CPUFREQ_RELATION_L);
755                 break;
756         }
757         return 0;
758 }
759
760 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
761                                              unsigned long val,
762                                              void *data)
763 {
764         switch (val) {
765         case IDLE_START:
766                 cpufreq_interactive_idle_start();
767                 break;
768         case IDLE_END:
769                 cpufreq_interactive_idle_end();
770                 break;
771         }
772
773         return 0;
774 }
775
776 static struct notifier_block cpufreq_interactive_idle_nb = {
777         .notifier_call = cpufreq_interactive_idle_notifier,
778 };
779
780 static int __init cpufreq_interactive_init(void)
781 {
782         unsigned int i;
783         struct cpufreq_interactive_cpuinfo *pcpu;
784         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
785
786         go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
787         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
788         timer_rate = DEFAULT_TIMER_RATE;
789
790         /* Initalize per-cpu timers */
791         for_each_possible_cpu(i) {
792                 pcpu = &per_cpu(cpuinfo, i);
793                 init_timer(&pcpu->cpu_timer);
794                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
795                 pcpu->cpu_timer.data = i;
796         }
797
798         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
799                                  "kinteractiveup");
800         if (IS_ERR(up_task))
801                 return PTR_ERR(up_task);
802
803         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
804         get_task_struct(up_task);
805
806         /* No rescuer thread, bind to CPU queuing the work for possibly
807            warm cache (probably doesn't matter much). */
808         down_wq = alloc_workqueue("knteractive_down", 0, 1);
809
810         if (!down_wq)
811                 goto err_freeuptask;
812
813         INIT_WORK(&freq_scale_down_work,
814                   cpufreq_interactive_freq_down);
815
816         spin_lock_init(&up_cpumask_lock);
817         spin_lock_init(&down_cpumask_lock);
818         mutex_init(&set_speed_lock);
819
820         idle_notifier_register(&cpufreq_interactive_idle_nb);
821
822         return cpufreq_register_governor(&cpufreq_gov_interactive);
823
824 err_freeuptask:
825         put_task_struct(up_task);
826         return -ENOMEM;
827 }
828
829 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
830 fs_initcall(cpufreq_interactive_init);
831 #else
832 module_init(cpufreq_interactive_init);
833 #endif
834
835 static void __exit cpufreq_interactive_exit(void)
836 {
837         cpufreq_unregister_governor(&cpufreq_gov_interactive);
838         kthread_stop(up_task);
839         put_task_struct(up_task);
840         destroy_workqueue(down_wq);
841 }
842
843 module_exit(cpufreq_interactive_exit);
844
845 MODULE_AUTHOR("Mike Chan <mike@android.com>");
846 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
847         "Latency sensitive workloads");
848 MODULE_LICENSE("GPL");