cpufreq: interactive: Add error checking on sysfs interfaces
[linux-2.6.git] / drivers / cpufreq / cpufreq_interactive.c
1 /*
2  * drivers/cpufreq/cpufreq_interactive.c
3  *
4  * Copyright (C) 2010 Google, Inc.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * Author: Mike Chan (mike@android.com)
16  *
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpufreq.h>
22 #include <linux/mutex.h>
23 #include <linux/sched.h>
24 #include <linux/tick.h>
25 #include <linux/timer.h>
26 #include <linux/workqueue.h>
27 #include <linux/kthread.h>
28
29 #include <asm/cputime.h>
30
31 static atomic_t active_count = ATOMIC_INIT(0);
32
33 struct cpufreq_interactive_cpuinfo {
34         struct timer_list cpu_timer;
35         int timer_idlecancel;
36         u64 time_in_idle;
37         u64 idle_exit_time;
38         u64 timer_run_time;
39         int idling;
40         u64 freq_change_time;
41         u64 freq_change_time_in_idle;
42         struct cpufreq_policy *policy;
43         struct cpufreq_frequency_table *freq_table;
44         unsigned int target_freq;
45         int governor_enabled;
46 };
47
48 static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
49
50 /* Workqueues handle frequency scaling */
51 static struct task_struct *up_task;
52 static struct workqueue_struct *down_wq;
53 static struct work_struct freq_scale_down_work;
54 static cpumask_t up_cpumask;
55 static spinlock_t up_cpumask_lock;
56 static cpumask_t down_cpumask;
57 static spinlock_t down_cpumask_lock;
58
59 /* Go to max speed when CPU load at or above this value. */
60 #define DEFAULT_GO_MAXSPEED_LOAD 85
61 static unsigned long go_maxspeed_load;
62
63 /*
64  * The minimum amount of time to spend at a frequency before we can ramp down.
65  */
66 #define DEFAULT_MIN_SAMPLE_TIME 80000;
67 static unsigned long min_sample_time;
68
69 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
70                 unsigned int event);
71
72 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
73 static
74 #endif
75 struct cpufreq_governor cpufreq_gov_interactive = {
76         .name = "interactive",
77         .governor = cpufreq_governor_interactive,
78         .max_transition_latency = 10000000,
79         .owner = THIS_MODULE,
80 };
81
82 static void cpufreq_interactive_timer(unsigned long data)
83 {
84         unsigned int delta_idle;
85         unsigned int delta_time;
86         int cpu_load;
87         int load_since_change;
88         u64 time_in_idle;
89         u64 idle_exit_time;
90         struct cpufreq_interactive_cpuinfo *pcpu =
91                 &per_cpu(cpuinfo, data);
92         u64 now_idle;
93         unsigned int new_freq;
94         unsigned int index;
95         unsigned long flags;
96
97         smp_rmb();
98
99         if (!pcpu->governor_enabled)
100                 goto exit;
101
102         /*
103          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
104          * this lets idle exit know the current idle time sample has
105          * been processed, and idle exit can generate a new sample and
106          * re-arm the timer.  This prevents a concurrent idle
107          * exit on that CPU from writing a new set of info at the same time
108          * the timer function runs (the timer function can't use that info
109          * until more time passes).
110          */
111         time_in_idle = pcpu->time_in_idle;
112         idle_exit_time = pcpu->idle_exit_time;
113         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
114         smp_wmb();
115
116         /* If we raced with cancelling a timer, skip. */
117         if (!idle_exit_time)
118                 goto exit;
119
120         delta_idle = (unsigned int) cputime64_sub(now_idle, time_in_idle);
121         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
122                                                   idle_exit_time);
123
124         /*
125          * If timer ran less than 1ms after short-term sample started, retry.
126          */
127         if (delta_time < 1000)
128                 goto rearm;
129
130         if (delta_idle > delta_time)
131                 cpu_load = 0;
132         else
133                 cpu_load = 100 * (delta_time - delta_idle) / delta_time;
134
135         delta_idle = (unsigned int) cputime64_sub(now_idle,
136                                                  pcpu->freq_change_time_in_idle);
137         delta_time = (unsigned int) cputime64_sub(pcpu->timer_run_time,
138                                                   pcpu->freq_change_time);
139
140         if (delta_idle > delta_time)
141                 load_since_change = 0;
142         else
143                 load_since_change =
144                         100 * (delta_time - delta_idle) / delta_time;
145
146         /*
147          * Choose greater of short-term load (since last idle timer
148          * started or timer function re-armed itself) or long-term load
149          * (since last frequency change).
150          */
151         if (load_since_change > cpu_load)
152                 cpu_load = load_since_change;
153
154         if (cpu_load >= go_maxspeed_load)
155                 new_freq = pcpu->policy->max;
156         else
157                 new_freq = pcpu->policy->max * cpu_load / 100;
158
159         if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
160                                            new_freq, CPUFREQ_RELATION_H,
161                                            &index)) {
162                 pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
163                              (int) data);
164                 goto rearm;
165         }
166
167         new_freq = pcpu->freq_table[index].frequency;
168
169         if (pcpu->target_freq == new_freq)
170                 goto rearm_if_notmax;
171
172         /*
173          * Do not scale down unless we have been at this frequency for the
174          * minimum sample time.
175          */
176         if (new_freq < pcpu->target_freq) {
177                 if (cputime64_sub(pcpu->timer_run_time, pcpu->freq_change_time) <
178                     min_sample_time)
179                         goto rearm;
180         }
181
182         if (new_freq < pcpu->target_freq) {
183                 pcpu->target_freq = new_freq;
184                 spin_lock_irqsave(&down_cpumask_lock, flags);
185                 cpumask_set_cpu(data, &down_cpumask);
186                 spin_unlock_irqrestore(&down_cpumask_lock, flags);
187                 queue_work(down_wq, &freq_scale_down_work);
188         } else {
189                 pcpu->target_freq = new_freq;
190                 spin_lock_irqsave(&up_cpumask_lock, flags);
191                 cpumask_set_cpu(data, &up_cpumask);
192                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
193                 wake_up_process(up_task);
194         }
195
196 rearm_if_notmax:
197         /*
198          * Already set max speed and don't see a need to change that,
199          * wait until next idle to re-evaluate, don't need timer.
200          */
201         if (pcpu->target_freq == pcpu->policy->max)
202                 goto exit;
203
204 rearm:
205         if (!timer_pending(&pcpu->cpu_timer)) {
206                 /*
207                  * If already at min: if that CPU is idle, don't set timer.
208                  * Else cancel the timer if that CPU goes idle.  We don't
209                  * need to re-evaluate speed until the next idle exit.
210                  */
211                 if (pcpu->target_freq == pcpu->policy->min) {
212                         smp_rmb();
213
214                         if (pcpu->idling)
215                                 goto exit;
216
217                         pcpu->timer_idlecancel = 1;
218                 }
219
220                 pcpu->time_in_idle = get_cpu_idle_time_us(
221                         data, &pcpu->idle_exit_time);
222                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
223         }
224
225 exit:
226         return;
227 }
228
229 static void cpufreq_interactive_idle_start(void)
230 {
231         struct cpufreq_interactive_cpuinfo *pcpu =
232                 &per_cpu(cpuinfo, smp_processor_id());
233         int pending;
234
235         if (!pcpu->governor_enabled) {
236                 return;
237         }
238
239         pcpu->idling = 1;
240         smp_wmb();
241         pending = timer_pending(&pcpu->cpu_timer);
242
243         if (pcpu->target_freq != pcpu->policy->min) {
244 #ifdef CONFIG_SMP
245                 /*
246                  * Entering idle while not at lowest speed.  On some
247                  * platforms this can hold the other CPU(s) at that speed
248                  * even though the CPU is idle. Set a timer to re-evaluate
249                  * speed so this idle CPU doesn't hold the other CPUs above
250                  * min indefinitely.  This should probably be a quirk of
251                  * the CPUFreq driver.
252                  */
253                 if (!pending) {
254                         pcpu->time_in_idle = get_cpu_idle_time_us(
255                                 smp_processor_id(), &pcpu->idle_exit_time);
256                         pcpu->timer_idlecancel = 0;
257                         mod_timer(&pcpu->cpu_timer, jiffies + 2);
258                 }
259 #endif
260         } else {
261                 /*
262                  * If at min speed and entering idle after load has
263                  * already been evaluated, and a timer has been set just in
264                  * case the CPU suddenly goes busy, cancel that timer.  The
265                  * CPU didn't go busy; we'll recheck things upon idle exit.
266                  */
267                 if (pending && pcpu->timer_idlecancel) {
268                         del_timer(&pcpu->cpu_timer);
269                         /*
270                          * Ensure last timer run time is after current idle
271                          * sample start time, so next idle exit will always
272                          * start a new idle sampling period.
273                          */
274                         pcpu->idle_exit_time = 0;
275                         pcpu->timer_idlecancel = 0;
276                 }
277         }
278
279 }
280
281 static void cpufreq_interactive_idle_end(void)
282 {
283         struct cpufreq_interactive_cpuinfo *pcpu =
284                 &per_cpu(cpuinfo, smp_processor_id());
285
286         pcpu->idling = 0;
287         smp_wmb();
288
289         /*
290          * Arm the timer for 1-2 ticks later if not already, and if the timer
291          * function has already processed the previous load sampling
292          * interval.  (If the timer is not pending but has not processed
293          * the previous interval, it is probably racing with us on another
294          * CPU.  Let it compute load based on the previous sample and then
295          * re-arm the timer for another interval when it's done, rather
296          * than updating the interval start time to be "now", which doesn't
297          * give the timer function enough time to make a decision on this
298          * run.)
299          */
300         if (timer_pending(&pcpu->cpu_timer) == 0 &&
301             pcpu->timer_run_time >= pcpu->idle_exit_time &&
302             pcpu->governor_enabled) {
303                 pcpu->time_in_idle =
304                         get_cpu_idle_time_us(smp_processor_id(),
305                                              &pcpu->idle_exit_time);
306                 pcpu->timer_idlecancel = 0;
307                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
308         }
309
310 }
311
312 static int cpufreq_interactive_up_task(void *data)
313 {
314         unsigned int cpu;
315         cpumask_t tmp_mask;
316         unsigned long flags;
317         struct cpufreq_interactive_cpuinfo *pcpu;
318
319         while (1) {
320                 set_current_state(TASK_INTERRUPTIBLE);
321                 spin_lock_irqsave(&up_cpumask_lock, flags);
322
323                 if (cpumask_empty(&up_cpumask)) {
324                         spin_unlock_irqrestore(&up_cpumask_lock, flags);
325                         schedule();
326
327                         if (kthread_should_stop())
328                                 break;
329
330                         spin_lock_irqsave(&up_cpumask_lock, flags);
331                 }
332
333                 set_current_state(TASK_RUNNING);
334
335                 tmp_mask = up_cpumask;
336                 cpumask_clear(&up_cpumask);
337                 spin_unlock_irqrestore(&up_cpumask_lock, flags);
338
339                 for_each_cpu(cpu, &tmp_mask) {
340                         pcpu = &per_cpu(cpuinfo, cpu);
341
342                         smp_rmb();
343
344                         if (!pcpu->governor_enabled)
345                                 continue;
346
347                         __cpufreq_driver_target(pcpu->policy,
348                                                 pcpu->target_freq,
349                                                 CPUFREQ_RELATION_H);
350                         pcpu->freq_change_time_in_idle =
351                                 get_cpu_idle_time_us(cpu,
352                                                      &pcpu->freq_change_time);
353                 }
354         }
355
356         return 0;
357 }
358
359 static void cpufreq_interactive_freq_down(struct work_struct *work)
360 {
361         unsigned int cpu;
362         cpumask_t tmp_mask;
363         unsigned long flags;
364         struct cpufreq_interactive_cpuinfo *pcpu;
365
366         spin_lock_irqsave(&down_cpumask_lock, flags);
367         tmp_mask = down_cpumask;
368         cpumask_clear(&down_cpumask);
369         spin_unlock_irqrestore(&down_cpumask_lock, flags);
370
371         for_each_cpu(cpu, &tmp_mask) {
372                 pcpu = &per_cpu(cpuinfo, cpu);
373
374                 smp_rmb();
375
376                 if (!pcpu->governor_enabled)
377                         continue;
378
379                 __cpufreq_driver_target(pcpu->policy,
380                                         pcpu->target_freq,
381                                         CPUFREQ_RELATION_H);
382                 pcpu->freq_change_time_in_idle =
383                         get_cpu_idle_time_us(cpu,
384                                              &pcpu->freq_change_time);
385         }
386 }
387
388 static ssize_t show_go_maxspeed_load(struct kobject *kobj,
389                                      struct attribute *attr, char *buf)
390 {
391         return sprintf(buf, "%lu\n", go_maxspeed_load);
392 }
393
394 static ssize_t store_go_maxspeed_load(struct kobject *kobj,
395                         struct attribute *attr, const char *buf, size_t count)
396 {
397         int ret;
398         unsigned long val;
399
400         ret = strict_strtoul(buf, 0, &val);
401         if (ret < 0)
402                 return ret;
403         go_maxspeed_load = val;
404         return count;
405 }
406
407 static struct global_attr go_maxspeed_load_attr = __ATTR(go_maxspeed_load, 0644,
408                 show_go_maxspeed_load, store_go_maxspeed_load);
409
410 static ssize_t show_min_sample_time(struct kobject *kobj,
411                                 struct attribute *attr, char *buf)
412 {
413         return sprintf(buf, "%lu\n", min_sample_time);
414 }
415
416 static ssize_t store_min_sample_time(struct kobject *kobj,
417                         struct attribute *attr, const char *buf, size_t count)
418 {
419         int ret;
420         unsigned long val;
421
422         ret = strict_strtoul(buf, 0, &val);
423         if (ret < 0)
424                 return ret;
425         min_sample_time = val;
426         return count;
427 }
428
429 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
430                 show_min_sample_time, store_min_sample_time);
431
432 static struct attribute *interactive_attributes[] = {
433         &go_maxspeed_load_attr.attr,
434         &min_sample_time_attr.attr,
435         NULL,
436 };
437
438 static struct attribute_group interactive_attr_group = {
439         .attrs = interactive_attributes,
440         .name = "interactive",
441 };
442
443 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
444                 unsigned int event)
445 {
446         int rc;
447         unsigned int j;
448         struct cpufreq_interactive_cpuinfo *pcpu;
449         struct cpufreq_frequency_table *freq_table;
450
451         switch (event) {
452         case CPUFREQ_GOV_START:
453                 if (!cpu_online(policy->cpu))
454                         return -EINVAL;
455
456                 freq_table =
457                         cpufreq_frequency_get_table(policy->cpu);
458
459                 for_each_cpu(j, policy->cpus) {
460                         pcpu = &per_cpu(cpuinfo, j);
461                         pcpu->policy = policy;
462                         pcpu->target_freq = policy->cur;
463                         pcpu->freq_table = freq_table;
464                         pcpu->freq_change_time_in_idle =
465                                 get_cpu_idle_time_us(j,
466                                              &pcpu->freq_change_time);
467                         pcpu->governor_enabled = 1;
468                         smp_wmb();
469                 }
470
471                 /*
472                  * Do not register the idle hook and create sysfs
473                  * entries if we have already done so.
474                  */
475                 if (atomic_inc_return(&active_count) > 1)
476                         return 0;
477
478                 rc = sysfs_create_group(cpufreq_global_kobject,
479                                 &interactive_attr_group);
480                 if (rc)
481                         return rc;
482
483                 break;
484
485         case CPUFREQ_GOV_STOP:
486                 for_each_cpu(j, policy->cpus) {
487                         pcpu = &per_cpu(cpuinfo, j);
488                         pcpu->governor_enabled = 0;
489                         smp_wmb();
490                         del_timer_sync(&pcpu->cpu_timer);
491
492                         /*
493                          * Reset idle exit time since we may cancel the timer
494                          * before it can run after the last idle exit time,
495                          * to avoid tripping the check in idle exit for a timer
496                          * that is trying to run.
497                          */
498                         pcpu->idle_exit_time = 0;
499                 }
500
501                 flush_work(&freq_scale_down_work);
502                 if (atomic_dec_return(&active_count) > 0)
503                         return 0;
504
505                 sysfs_remove_group(cpufreq_global_kobject,
506                                 &interactive_attr_group);
507
508                 break;
509
510         case CPUFREQ_GOV_LIMITS:
511                 if (policy->max < policy->cur)
512                         __cpufreq_driver_target(policy,
513                                         policy->max, CPUFREQ_RELATION_H);
514                 else if (policy->min > policy->cur)
515                         __cpufreq_driver_target(policy,
516                                         policy->min, CPUFREQ_RELATION_L);
517                 break;
518         }
519         return 0;
520 }
521
522 static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
523                                              unsigned long val,
524                                              void *data)
525 {
526         switch (val) {
527         case IDLE_START:
528                 cpufreq_interactive_idle_start();
529                 break;
530         case IDLE_END:
531                 cpufreq_interactive_idle_end();
532                 break;
533         }
534
535         return 0;
536 }
537
538 static struct notifier_block cpufreq_interactive_idle_nb = {
539         .notifier_call = cpufreq_interactive_idle_notifier,
540 };
541
542 static int __init cpufreq_interactive_init(void)
543 {
544         unsigned int i;
545         struct cpufreq_interactive_cpuinfo *pcpu;
546         struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
547
548         go_maxspeed_load = DEFAULT_GO_MAXSPEED_LOAD;
549         min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
550
551         /* Initalize per-cpu timers */
552         for_each_possible_cpu(i) {
553                 pcpu = &per_cpu(cpuinfo, i);
554                 init_timer(&pcpu->cpu_timer);
555                 pcpu->cpu_timer.function = cpufreq_interactive_timer;
556                 pcpu->cpu_timer.data = i;
557         }
558
559         up_task = kthread_create(cpufreq_interactive_up_task, NULL,
560                                  "kinteractiveup");
561         if (IS_ERR(up_task))
562                 return PTR_ERR(up_task);
563
564         sched_setscheduler_nocheck(up_task, SCHED_FIFO, &param);
565         get_task_struct(up_task);
566
567         /* No rescuer thread, bind to CPU queuing the work for possibly
568            warm cache (probably doesn't matter much). */
569         down_wq = alloc_workqueue("knteractive_down", 0, 1);
570
571         if (! down_wq)
572                 goto err_freeuptask;
573
574         INIT_WORK(&freq_scale_down_work,
575                   cpufreq_interactive_freq_down);
576
577         spin_lock_init(&up_cpumask_lock);
578         spin_lock_init(&down_cpumask_lock);
579
580         idle_notifier_register(&cpufreq_interactive_idle_nb);
581
582         return cpufreq_register_governor(&cpufreq_gov_interactive);
583
584 err_freeuptask:
585         put_task_struct(up_task);
586         return -ENOMEM;
587 }
588
589 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
590 fs_initcall(cpufreq_interactive_init);
591 #else
592 module_init(cpufreq_interactive_init);
593 #endif
594
595 static void __exit cpufreq_interactive_exit(void)
596 {
597         cpufreq_unregister_governor(&cpufreq_gov_interactive);
598         kthread_stop(up_task);
599         put_task_struct(up_task);
600         destroy_workqueue(down_wq);
601 }
602
603 module_exit(cpufreq_interactive_exit);
604
605 MODULE_AUTHOR("Mike Chan <mike@android.com>");
606 MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
607         "Latency sensitive workloads");
608 MODULE_LICENSE("GPL");