cpufreq: set go_maxspeed_load to 85%
[linux-2.6.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/mutex.h>
23 #include <linux/sched.h>
24 #include <linux/tick.h>
25 #include <linux/timer.h>
26 #include <linux/workqueue.h>
27 #include <linux/kthread.h>
28 #include <linux/mutex.h>
29
30 #include <asm/cputime.h>
31
32 static atomic_t active_count = ATOMIC_INIT(0);
33
34 struct cpufreq_interactive_cpuinfo {
35         struct timer_list cpu_timer;
36         int timer_idlecancel;
37         u64 time_in_idle;
38         u64 time_in_iowait;
39         u64 idle_exit_time;
40         u64 timer_run_time;
41         int idling;
42         u64 freq_change_time;
43         u64 freq_change_time_in_idle;
44         u64 freq_change_time_in_iowait;
45         struct cpufreq_policy *policy;
46         struct cpufreq_frequency_table *freq_table;
47         unsigned int target_freq;
48         int governor_enabled;
49 };
50
51 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
52
53 /* Workqueues handle frequency scaling */
54 static struct task_struct *up_task;
55 static struct workqueue_struct *down_wq;
56 static struct work_struct freq_scale_down_work;
57 static cpumask_t up_cpumask;
58 static spinlock_t up_cpumask_lock;
59 static cpumask_t down_cpumask;
60 static spinlock_t down_cpumask_lock;
61 static struct mutex set_speed_lock;
62
63 /* Go to max speed when CPU load at or above this value. */
64 #define DEFAULT_GO_MAXSPEED_LOAD 85
65 static unsigned long go_maxspeed_load;
66
67 /* Base of exponential raise to max speed; if 0 - jump to maximum */
68 static unsigned long boost_factor;
69
70 /* Max frequency boost in Hz; if 0 - no max is enforced */
71 static unsigned long max_boost;
72
73 /* Consider IO as busy */
74 static unsigned long io_is_busy;
75
76 /*
77  * Targeted sustainable load relatively to current frequency.
78  * If 0, target is set realtively to the max speed
79  */
80 static unsigned long sustain_load;
81
82 /*
83  * The minimum amount of time to spend at a frequency before we can ramp down.
84  */
85 #define DEFAULT_MIN_SAMPLE_TIME 30000;
86 static unsigned long min_sample_time;
87
88 /*
89  * The sample rate of the timer used to increase frequency
90  */
91 #define DEFAULT_TIMER_RATE 10000;
92 static unsigned long timer_rate;
93
94 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
95                 unsigned int event);
96
97 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
98 static
99 #endif
100 struct cpufreq_governor cpufreq_gov_interactive = {
101         .name = "interactive",
102         .governor = cpufreq_governor_interactive,
103         .max_transition_latency = 10000000,
104         .owner = THIS_MODULE,
105 };
106
107 static unsigned int cpufreq_interactive_get_target(
108         int cpu_load, int load_since_change, struct cpufreq_policy *policy)
109 {
110         unsigned int target_freq;
111
112         /*
113          * Choose greater of short-term load (since last idle timer
114          * started or timer function re-armed itself) or long-term load
115          * (since last frequency change).
116          */
117         if (load_since_change > cpu_load)
118                 cpu_load = load_since_change;
119
120         if (cpu_load >= go_maxspeed_load) {
121                 if (!boost_factor)
122                         return policy->max;
123
124                 target_freq = policy->cur * boost_factor;
125
126                 if (max_boost && target_freq > policy->cur + max_boost)
127                         target_freq = policy->cur + max_boost;
128         }
129         else {
130                 if (!sustain_load)
131                         return policy->max * cpu_load / 100;
132
133                 target_freq = policy->cur * cpu_load / sustain_load;
134         }
135
136         target_freq = min(target_freq, policy->max);
137         return target_freq;
138 }
139
140 static inline cputime64_t get_cpu_iowait_time(
141         unsigned int cpu, cputime64_t *wall)
142 {
143         u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
144
145         if (iowait_time == -1ULL)
146                 return 0;
147
148         return iowait_time;
149 }
150
151 static void cpufreq_interactive_timer(unsigned long data)
152 {
153         unsigned int delta_idle;
154         unsigned int delta_iowait;
155         unsigned int delta_time;
156         int cpu_load;
157         int load_since_change;
158         u64 time_in_idle;
159         u64 time_in_iowait;
160         u64 idle_exit_time;
161         struct cpufreq_interactive_cpuinfo *pcpu =
162                 &per_cpu(cpuinfo, data);
163         u64 now_idle;
164         u64 now_iowait;
165         unsigned int new_freq;
166         unsigned int index;
167         unsigned long flags;
168
169         smp_rmb();
170
171         if (!pcpu->governor_enabled)
172                 goto exit;
173
174         /*
175          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
176          * this lets idle exit know the current idle time sample has
177          * been processed, and idle exit can generate a new sample and
178          * re-arm the timer.  This prevents a concurrent idle
179          * exit on that CPU from writing a new set of info at the same time
180          * the timer function runs (the timer function can't use that info
181          * until more time passes).
182          */
183         time_in_idle = pcpu->time_in_idle;
184         time_in_iowait = pcpu->time_in_iowait;
185         idle_exit_time = pcpu->idle_exit_time;
186         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
187         now_iowait = get_cpu_iowait_time(data, NULL);
188         smp_wmb();
189
190         /* If we raced with cancelling a timer, skip. */
191         if (!idle_exit_time)
192                 goto exit;
193
194         delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
195         delta_iowait = (unsigned int) cputime64_sub(now_iowait, time_in_iowait);
196         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
197                                                   idle_exit_time);
198
199         /*
200          * If timer ran less than 1ms after short-term sample started, retry.
201          */
202         if (delta_time < 1000)
203                 goto rearm;
204
205         if (delta_idle > delta_time)
206                 cpu_load = 0;
207         else {
208                 if (io_is_busy && delta_idle >= delta_iowait)
209                         delta_idle -= delta_iowait;
210
211                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
212         }
213
214         delta_idle = (unsigned int) cputime64_sub(now_idle,
215                                                 pcpu->freq_change_time_in_idle);
216         delta_iowait = (unsigned int) cputime64_sub(now_iowait,
217                                         pcpu->freq_change_time_in_iowait);
218         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
219                                                   pcpu->freq_change_time);
220
221         if ((delta_time == 0) || (delta_idle > delta_time))
222                 load_since_change = 0;
223         else {
224                 if (io_is_busy && delta_idle >= delta_iowait)
225                         delta_idle -= delta_iowait;
226
227                 load_since_change =
228                         100 * (delta_time - delta_idle) / delta_time;
229         }
230
231         /*
232          * Combine short-term load (since last idle timer started or timer
233          * function re-armed itself) and long-term load (since last frequency
234          * change) to determine new target frequency
235          */
236         new_freq = cpufreq_interactive_get_target(cpu_load, load_since_change,
237                                                   pcpu->policy);
238
239         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
240                                            new_freq, CPUFREQ_RELATION_H,
241                                            &index)) {
242                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
243                              (int) data);
244                 goto rearm;
245         }
246
247         new_freq = pcpu->freq_table[index].frequency;
248
249         if (pcpu->target_freq == new_freq)
250                 goto rearm_if_notmax;
251
252         /*
253          * Do not scale down unless we have been at this frequency for the
254          * minimum sample time.
255          */
256         if (new_freq < pcpu->target_freq) {
257                 if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time)
258                     < min_sample_time)
259                         goto rearm;
260         }
261
262         if (new_freq < pcpu->target_freq) {
263                 pcpu->target_freq = new_freq;
264                 spin_lock_irqsave(&down_cpumask_lock, flags);
265                 cpumask_set_cpu(data, &down_cpumask);
266                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
267                 queue_work(down_wq, &freq_scale_down_work);
268         } else {
269                 pcpu->target_freq = new_freq;
270                 spin_lock_irqsave(&up_cpumask_lock, flags);
271                 cpumask_set_cpu(data, &up_cpumask);
272                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
273                 wake_up_process(up_task);
274         }
275
276 rearm_if_notmax:
277         /*
278          * Already set max speed and don't see a need to change that,
279          * wait until next idle to re-evaluate, don't need timer.
280          */
281         if (pcpu->target_freq == pcpu->policy->max)
282                 goto exit;
283
284 rearm:
285         if (!timer_pending(&pcpu->cpu_timer)) {
286                 /*
287                  * If already at min: if that CPU is idle, don't set timer.
288                  * Else cancel the timer if that CPU goes idle.  We don't
289                  * need to re-evaluate speed until the next idle exit.
290                  */
291                 if (pcpu->target_freq == pcpu->policy->min) {
292                         smp_rmb();
293
294                         if (pcpu->idling)
295                                 goto exit;
296
297                         pcpu->timer_idlecancel = 1;
298                 }
299
300                 pcpu->time_in_idle = get_cpu_idle_time_us(
301                         data, &pcpu->idle_exit_time);
302                 pcpu->time_in_iowait = get_cpu_iowait_time(
303                         data, NULL);
304
305                 mod_timer(&pcpu->cpu_timer,
306                           jiffies + usecs_to_jiffies(timer_rate));
307         }
308
309 exit:
310         return;
311 }
312
313 static void cpufreq_interactive_idle_start(void)
314 {
315         struct cpufreq_interactive_cpuinfo *pcpu =
316                 &per_cpu(cpuinfo, smp_processor_id());
317         int pending;
318
319         if (!pcpu->governor_enabled)
320                 return;
321
322         pcpu->idling = 1;
323         smp_wmb();
324         pending = timer_pending(&pcpu->cpu_timer);
325
326         if (pcpu->target_freq != pcpu->policy->min) {
327 #ifdef CONFIG_SMP
328                 /*
329                  * Entering idle while not at lowest speed.  On some
330                  * platforms this can hold the other CPU(s) at that speed
331                  * even though the CPU is idle. Set a timer to re-evaluate
332                  * speed so this idle CPU doesn't hold the other CPUs above
333                  * min indefinitely.  This should probably be a quirk of
334                  * the CPUFreq driver.
335                  */
336                 if (!pending) {
337                         pcpu->time_in_idle = get_cpu_idle_time_us(
338                                 smp_processor_id(), &pcpu->idle_exit_time);
339                         pcpu->time_in_iowait = get_cpu_iowait_time(
340                                 smp_processor_id(), NULL);
341                         pcpu->timer_idlecancel = 0;
342                         mod_timer(&pcpu->cpu_timer,
343                                   jiffies + usecs_to_jiffies(timer_rate));
344                 }
345 #endif
346         } else {
347                 /*
348                  * If at min speed and entering idle after load has
349                  * already been evaluated, and a timer has been set just in
350                  * case the CPU suddenly goes busy, cancel that timer.  The
351                  * CPU didn't go busy; we'll recheck things upon idle exit.
352                  */
353                 if (pending && pcpu->timer_idlecancel) {
354                         del_timer(&pcpu->cpu_timer);
355                         /*
356                          * Ensure last timer run time is after current idle
357                          * sample start time, so next idle exit will always
358                          * start a new idle sampling period.
359                          */
360                         pcpu->idle_exit_time = 0;
361                         pcpu->timer_idlecancel = 0;
362                 }
363         }
364
365 }
366
367 static void cpufreq_interactive_idle_end(void)
368 {
369         struct cpufreq_interactive_cpuinfo *pcpu =
370                 &per_cpu(cpuinfo, smp_processor_id());
371
372         pcpu->idling = 0;
373         smp_wmb();
374
375         /*
376          * Arm the timer for 1-2 ticks later if not already, and if the timer
377          * function has already processed the previous load sampling
378          * interval.  (If the timer is not pending but has not processed
379          * the previous interval, it is probably racing with us on another
380          * CPU.  Let it compute load based on the previous sample and then
381          * re-arm the timer for another interval when it's done, rather
382          * than updating the interval start time to be "now", which doesn't
383          * give the timer function enough time to make a decision on this
384          * run.)
385          */
386         if (timer_pending(&pcpu->cpu_timer) == 0 &&
387             pcpu->timer_run_time >= pcpu->idle_exit_time &&
388             pcpu->governor_enabled) {
389                 pcpu->time_in_idle =
390                         get_cpu_idle_time_us(smp_processor_id(),
391                                              &pcpu->idle_exit_time);
392                 pcpu->time_in_iowait =
393                         get_cpu_iowait_time(smp_processor_id(),
394                                                 NULL);
395                 pcpu->timer_idlecancel = 0;
396                 mod_timer(&pcpu->cpu_timer,
397                           jiffies + usecs_to_jiffies(timer_rate));
398         }
399
400 }
401
402 static int cpufreq_interactive_up_task(void *data)
403 {
404         unsigned int cpu;
405         cpumask_t tmp_mask;
406         unsigned long flags;
407         struct cpufreq_interactive_cpuinfo *pcpu;
408
409         while (1) {
410                 set_current_state(TASK_INTERRUPTIBLE);
411                 spin_lock_irqsave(&up_cpumask_lock, flags);
412
413                 if (cpumask_empty(&up_cpumask)) {
414                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
415                         schedule();
416
417                         if (kthread_should_stop())
418                                 break;
419
420                         spin_lock_irqsave(&up_cpumask_lock, flags);
421                 }
422
423                 set_current_state(TASK_RUNNING);
424                 tmp_mask = up_cpumask;
425                 cpumask_clear(&up_cpumask);
426                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
427
428                 for_each_cpu(cpu, &tmp_mask) {
429                         unsigned int j;
430                         unsigned int max_freq = 0;
431
432                         pcpu = &per_cpu(cpuinfo, cpu);
433                         smp_rmb();
434
435                         if (!pcpu->governor_enabled)
436                                 continue;
437
438                         mutex_lock(&set_speed_lock);
439
440                         for_each_cpu(j, pcpu->policy->cpus) {
441                                 struct cpufreq_interactive_cpuinfo *pjcpu =
442                                         &per_cpu(cpuinfo, j);
443
444                                 if (pjcpu->target_freq > max_freq)
445                                         max_freq = pjcpu->target_freq;
446                         }
447
448                         __cpufreq_driver_target(pcpu->policy,
449                                                 max_freq,
450                                                 CPUFREQ_RELATION_H);
451                         mutex_unlock(&set_speed_lock);
452
453                         pcpu->freq_change_time_in_idle =
454                                 get_cpu_idle_time_us(cpu,
455                                                      &pcpu->freq_change_time);
456                         pcpu->freq_change_time_in_iowait =
457                                 get_cpu_iowait_time(cpu, NULL);
458                 }
459         }
460
461         return 0;
462 }
463
464 static void cpufreq_interactive_freq_down(struct work_struct *work)
465 {
466         unsigned int cpu;
467         cpumask_t tmp_mask;
468         unsigned long flags;
469         struct cpufreq_interactive_cpuinfo *pcpu;
470
471         spin_lock_irqsave(&down_cpumask_lock, flags);
472         tmp_mask = down_cpumask;
473         cpumask_clear(&down_cpumask);
474         spin_unlock_irqrestore(&down_cpumask_lock, flags);
475
476         for_each_cpu(cpu, &tmp_mask) {
477                 unsigned int j;
478                 unsigned int max_freq = 0;
479
480                 pcpu = &per_cpu(cpuinfo, cpu);
481                 smp_rmb();
482
483                 if (!pcpu->governor_enabled)
484                         continue;
485
486                 mutex_lock(&set_speed_lock);
487
488                 for_each_cpu(j, pcpu->policy->cpus) {
489                         struct cpufreq_interactive_cpuinfo *pjcpu =
490                                 &per_cpu(cpuinfo, j);
491
492                         if (pjcpu->target_freq > max_freq)
493                                 max_freq = pjcpu->target_freq;
494                 }
495
496                 __cpufreq_driver_target(pcpu->policy, max_freq,
497                                         CPUFREQ_RELATION_H);
498
499                 mutex_unlock(&set_speed_lock);
500                 pcpu->freq_change_time_in_idle =
501                         get_cpu_idle_time_us(cpu,
502                                              &pcpu->freq_change_time);
503                 pcpu->freq_change_time_in_iowait =
504                         get_cpu_iowait_time(cpu, NULL);
505         }
506 }
507
508 static ssize_t show_go_maxspeed_load(struct kobject *kobj,
509                                      struct attribute *attr, char *buf)
510 {
511         return sprintf(buf, "%lu\n", go_maxspeed_load);
512 }
513
514 static ssize_t store_go_maxspeed_load(struct kobject *kobj,
515                         struct attribute *attr, const char *buf, size_t count)
516 {
517         int ret;
518         unsigned long val;
519
520         ret = strict_strtoul(buf, 0, &val);
521         if (ret < 0)
522                 return ret;
523         go_maxspeed_load = val;
524         return count;
525 }
526
527 static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644,
528                 show_go_maxspeed_load, store_go_maxspeed_load);
529
530 static ssize_t show_boost_factor(struct kobject *kobj,
531                                      struct attribute *attr, char *buf)
532 {
533         return sprintf(buf, "%lu\n", boost_factor);
534 }
535
536 static ssize_t store_boost_factor(struct kobject *kobj,
537                         struct attribute *attr, const char *buf, size_t count)
538 {
539         int ret;
540         unsigned long val;
541
542         ret = strict_strtoul(buf, 0, &val);
543         if (ret < 0)
544                 return ret;
545         boost_factor = val;
546         return count;
547 }
548
549 static struct global_attr boost_factor_attr = __ATTR(boost_factor, 0644,
550                 show_boost_factor, store_boost_factor);
551
552 static ssize_t show_io_is_busy(struct kobject *kobj,
553                                      struct attribute *attr, char *buf)
554 {
555         return sprintf(buf, "%lu\n", io_is_busy);
556 }
557
558 static ssize_t store_io_is_busy(struct kobject *kobj,
559                         struct attribute *attr, const char *buf, size_t count)
560 {
561         if (!strict_strtoul(buf, 0, &io_is_busy))
562                 return count;
563         return -EINVAL;
564 }
565
566 static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644,
567                 show_io_is_busy, store_io_is_busy);
568
569 static ssize_t show_max_boost(struct kobject *kobj,
570                                      struct attribute *attr, char *buf)
571 {
572         return sprintf(buf, "%lu\n", max_boost);
573 }
574
575 static ssize_t store_max_boost(struct kobject *kobj,
576                         struct attribute *attr, const char *buf, size_t count)
577 {
578         int ret;
579         unsigned long val;
580
581         ret = strict_strtoul(buf, 0, &val);
582         if (ret < 0)
583                 return ret;
584         max_boost = val;
585         return count;
586 }
587
588 static struct global_attr max_boost_attr = __ATTR(max_boost, 0644,
589                 show_max_boost, store_max_boost);
590
591
592 static ssize_t show_sustain_load(struct kobject *kobj,
593                                      struct attribute *attr, char *buf)
594 {
595         return sprintf(buf, "%lu\n", sustain_load);
596 }
597
598 static ssize_t store_sustain_load(struct kobject *kobj,
599                         struct attribute *attr, const char *buf, size_t count)
600 {
601         int ret;
602         unsigned long val;
603
604         ret = strict_strtoul(buf, 0, &val);
605         if (ret < 0)
606                 return ret;
607         sustain_load = val;
608         return count;
609 }
610
611 static struct global_attr sustain_load_attr = __ATTR(sustain_load, 0644,
612                 show_sustain_load, store_sustain_load);
613
614 static ssize_t show_min_sample_time(struct kobject *kobj,
615                                 struct attribute *attr, char *buf)
616 {
617         return sprintf(buf, "%lu\n", min_sample_time);
618 }
619
620 static ssize_t store_min_sample_time(struct kobject *kobj,
621                         struct attribute *attr, const char *buf, size_t count)
622 {
623         int ret;
624         unsigned long val;
625
626         ret = strict_strtoul(buf, 0, &val);
627         if (ret < 0)
628                 return ret;
629         min_sample_time = val;
630         return count;
631 }
632
633 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
634                 show_min_sample_time, store_min_sample_time);
635
636 static ssize_t show_timer_rate(struct kobject *kobj,
637                         struct attribute *attr, char *buf)
638 {
639         return sprintf(buf, "%lu\n", timer_rate);
640 }
641
642 static ssize_t store_timer_rate(struct kobject *kobj,
643                         struct attribute *attr, const char *buf, size_t count)
644 {
645         int ret;
646         unsigned long val;
647
648         ret = strict_strtoul(buf, 0, &val);
649         if (ret < 0)
650                 return ret;
651         timer_rate = val;
652         return count;
653 }
654
655 static struct global_attr timer_rate_attr = __ATTR(timer_rate, 0644,
656                 show_timer_rate, store_timer_rate);
657
658 static struct attribute *interactive_attributes[] = {
659         &go_maxspeed_load_attr.attr,
660         &boost_factor_attr.attr,
661         &max_boost_attr.attr,
662         &io_is_busy_attr.attr,
663         &sustain_load_attr.attr,
664         &min_sample_time_attr.attr,
665         &timer_rate_attr.attr,
666         NULL,
667 };
668
669 static struct attribute_group interactive_attr_group = {
670         .attrs = interactive_attributes,
671         .name = "interactive",
672 };
673
674 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
675                 unsigned int event)
676 {
677         int rc;
678         unsigned int j;
679         struct cpufreq_interactive_cpuinfo *pcpu;
680         struct cpufreq_frequency_table *freq_table;
681
682         switch (event) {
683         case CPUFREQ_GOV_START:
684                 if (!cpu_online(policy->cpu))
685                         return -EINVAL;
686
687                 freq_table =
688                         cpufreq_frequency_get_table(policy->cpu);
689
690                 for_each_cpu(j, policy->cpus) {
691                         pcpu = &per_cpu(cpuinfo, j);
692                         pcpu->policy = policy;
693                         pcpu->target_freq = policy->cur;
694                         pcpu->freq_table = freq_table;
695                         pcpu->freq_change_time_in_idle =
696                                 get_cpu_idle_time_us(j,
697                                              &pcpu->freq_change_time);
698                         pcpu->time_in_idle = pcpu->freq_change_time_in_idle;
699                         pcpu->idle_exit_time = pcpu->freq_change_time;
700                         pcpu->freq_change_time_in_iowait =
701                                 get_cpu_iowait_time(j, NULL);
702                         pcpu->time_in_iowait = pcpu->freq_change_time_in_iowait;
703
704                         pcpu->timer_idlecancel = 1;
705                         pcpu->governor_enabled = 1;
706                         smp_wmb();
707
708                         if (!timer_pending(&pcpu->cpu_timer))
709                                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
710                 }
711
712                 /*
713                  * Do not register the idle hook and create sysfs
714                  * entries if we have already done so.
715                  */
716                 if (atomic_inc_return(&active_count) > 1)
717                         return 0;
718
719                 rc = sysfs_create_group(cpufreq_global_kobject,
720                                 &interactive_attr_group);
721                 if (rc)
722                         return rc;
723
724                 break;
725
726         case CPUFREQ_GOV_STOP:
727                 for_each_cpu(j, policy->cpus) {
728                         pcpu = &per_cpu(cpuinfo, j);
729                         pcpu->governor_enabled = 0;
730                         smp_wmb();
731                         del_timer_sync(&pcpu->cpu_timer);
732
733                         /*
734                          * Reset idle exit time since we may cancel the timer
735                          * before it can run after the last idle exit time,
736                          * to avoid tripping the check in idle exit for a timer
737                          * that is trying to run.
738                          */
739                         pcpu->idle_exit_time = 0;
740                 }
741
742                 flush_work(&freq_scale_down_work);
743                 if (atomic_dec_return(&active_count) > 0)
744                         return 0;
745
746                 sysfs_remove_group(cpufreq_global_kobject,
747                                 &interactive_attr_group);
748
749                 break;
750
751         case CPUFREQ_GOV_LIMITS:
752                 if (policy->max < policy->cur)
753                         __cpufreq_driver_target(policy,
754                                         policy->max, CPUFREQ_RELATION_H);
755                 else if (policy->min > policy->cur)
756                         __cpufreq_driver_target(policy,
757                                         policy->min, CPUFREQ_RELATION_L);
758                 break;
759         }
760         return 0;
761 }
762
763 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
764                                              unsigned long val,
765                                              void *data)
766 {
767         switch (val) {
768         case IDLE_START:
769                 cpufreq_interactive_idle_start();
770                 break;
771         case IDLE_END:
772                 cpufreq_interactive_idle_end();
773                 break;
774         }
775
776         return 0;
777 }
778
779 static struct notifier_block cpufreq_interactive_idle_nb = {
780         .notifier_call = cpufreq_interactive_idle_notifier,
781 };
782
783 static int __init cpufreq_interactive_init(void)
784 {
785         unsigned int i;
786         struct cpufreq_interactive_cpuinfo *pcpu;
787         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
788
789         go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
790         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
791         timer_rate = DEFAULT_TIMER_RATE;
792
793         /* Initalize per-cpu timers */
794         for_each_possible_cpu(i) {
795                 pcpu = &per_cpu(cpuinfo, i);
796                 init_timer(&pcpu->cpu_timer);
797                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
798                 pcpu->cpu_timer.data = i;
799         }
800
801         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
802                                  "kinteractiveup");
803         if (IS_ERR(up_task))
804                 return PTR_ERR(up_task);
805
806         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
807         get_task_struct(up_task);
808
809         /* No rescuer thread, bind to CPU queuing the work for possibly
810            warm cache (probably doesn't matter much). */
811         down_wq = alloc_workqueue("knteractive_down", 0, 1);
812
813         if (!down_wq)
814                 goto err_freeuptask;
815
816         INIT_WORK(&freq_scale_down_work,
817                   cpufreq_interactive_freq_down);
818
819         spin_lock_init(&up_cpumask_lock);
820         spin_lock_init(&down_cpumask_lock);
821         mutex_init(&set_speed_lock);
822
823         idle_notifier_register(&cpufreq_interactive_idle_nb);
824
825         return cpufreq_register_governor(&cpufreq_gov_interactive);
826
827 err_freeuptask:
828         put_task_struct(up_task);
829         return -ENOMEM;
830 }
831
832 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
833 fs_initcall(cpufreq_interactive_init);
834 #else
835 module_init(cpufreq_interactive_init);
836 #endif
837
838 static void __exit cpufreq_interactive_exit(void)
839 {
840         cpufreq_unregister_governor(&cpufreq_gov_interactive);
841         kthread_stop(up_task);
842         put_task_struct(up_task);
843         destroy_workqueue(down_wq);
844 }
845
846 module_exit(cpufreq_interactive_exit);
847
848 MODULE_AUTHOR("Mike Chan <mike@android.com>");
849 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
850         "Latency sensitive workloads");
851 MODULE_LICENSE("GPL");