[CPUFREQ] ondemand/conservative: sanitize sampling_rate restrictions
[linux-2.6.git] / drivers / cpufreq / cpufreq_conservative.c
1 /*
2  *  drivers/cpufreq/cpufreq_conservative.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *            (C)  2004 Alexander Clouter <alex-kernel@digriz.org.uk>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/smp.h>
17 #include <linux/init.h>
18 #include <linux/interrupt.h>
19 #include <linux/ctype.h>
20 #include <linux/cpufreq.h>
21 #include <linux/sysctl.h>
22 #include <linux/types.h>
23 #include <linux/fs.h>
24 #include <linux/sysfs.h>
25 #include <linux/cpu.h>
26 #include <linux/kmod.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/kernel_stat.h>
30 #include <linux/percpu.h>
31 #include <linux/mutex.h>
32 /*
33  * dbs is used in this file as a shortform for demandbased switching
34  * It helps to keep variable names smaller, simpler
35  */
36
37 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
38 #define DEF_FREQUENCY_DOWN_THRESHOLD            (20)
39
40 /*
41  * The polling frequency of this governor depends on the capability of
42  * the processor. Default polling frequency is 1000 times the transition
43  * latency of the processor. The governor will work on any processor with
44  * transition latency <= 10mS, using appropriate sampling
45  * rate.
46  * For CPUs with transition latency > 10mS (mostly drivers
47  * with CPUFREQ_ETERNAL), this governor will not work.
48  * All times here are in uS.
49  */
50 static unsigned int def_sampling_rate;
51 #define MIN_SAMPLING_RATE_RATIO                 (2)
52 /* for correct statistics, we need at least 10 ticks between each measure */
53 #define MIN_STAT_SAMPLING_RATE                  \
54                         (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10))
55 #define MIN_SAMPLING_RATE                       \
56                         (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
57 /* Above MIN_SAMPLING_RATE will vanish with its sysfs file soon
58  * Define the minimal settable sampling rate to the greater of:
59  *   - "HW transition latency" * 100 (same as default sampling / 10)
60  *   - MIN_STAT_SAMPLING_RATE
61  * To avoid that userspace shoots itself.
62 */
63 static unsigned int minimum_sampling_rate(void)
64 {
65         return max(def_sampling_rate / 10, MIN_STAT_SAMPLING_RATE);
66 }
67
68 /* This will also vanish soon with removing sampling_rate_max */
69 #define MAX_SAMPLING_RATE                       (500 * def_sampling_rate)
70 #define LATENCY_MULTIPLIER                      (1000)
71 #define DEF_SAMPLING_DOWN_FACTOR                (1)
72 #define MAX_SAMPLING_DOWN_FACTOR                (10)
73 #define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
74
75 static void do_dbs_timer(struct work_struct *work);
76
77 struct cpu_dbs_info_s {
78         struct cpufreq_policy *cur_policy;
79         unsigned int prev_cpu_idle_up;
80         unsigned int prev_cpu_idle_down;
81         unsigned int enable;
82         unsigned int down_skip;
83         unsigned int requested_freq;
84 };
85 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
86
87 static unsigned int dbs_enable; /* number of CPUs using this policy */
88
89 /*
90  * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug
91  * lock and dbs_mutex. cpu_hotplug lock should always be held before
92  * dbs_mutex. If any function that can potentially take cpu_hotplug lock
93  * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
94  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
95  * is recursive for the same process. -Venki
96  */
97 static DEFINE_MUTEX(dbs_mutex);
98 static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer);
99
100 struct dbs_tuners {
101         unsigned int sampling_rate;
102         unsigned int sampling_down_factor;
103         unsigned int up_threshold;
104         unsigned int down_threshold;
105         unsigned int ignore_nice;
106         unsigned int freq_step;
107 };
108
109 static struct dbs_tuners dbs_tuners_ins = {
110         .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
111         .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD,
112         .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
113         .ignore_nice = 0,
114         .freq_step = 5,
115 };
116
117 static inline unsigned int get_cpu_idle_time(unsigned int cpu)
118 {
119         unsigned int add_nice = 0, ret;
120
121         if (dbs_tuners_ins.ignore_nice)
122                 add_nice = kstat_cpu(cpu).cpustat.nice;
123
124         ret = kstat_cpu(cpu).cpustat.idle +
125                 kstat_cpu(cpu).cpustat.iowait +
126                 add_nice;
127
128         return ret;
129 }
130
131 /* keep track of frequency transitions */
132 static int
133 dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
134                      void *data)
135 {
136         struct cpufreq_freqs *freq = data;
137         struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info,
138                                                         freq->cpu);
139
140         if (!this_dbs_info->enable)
141                 return 0;
142
143         this_dbs_info->requested_freq = freq->new;
144
145         return 0;
146 }
147
148 static struct notifier_block dbs_cpufreq_notifier_block = {
149         .notifier_call = dbs_cpufreq_notifier
150 };
151
152 /************************** sysfs interface ************************/
153 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
154 {
155         static int print_once;
156
157         if (!print_once) {
158                 printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max "
159                        "sysfs file is deprecated - used by: %s\n",
160                        current->comm);
161                 print_once = 1;
162         }
163         return sprintf(buf, "%u\n", MAX_SAMPLING_RATE);
164 }
165
166 static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf)
167 {
168         static int print_once;
169
170         if (!print_once) {
171                 printk(KERN_INFO "CPUFREQ: conservative sampling_rate_max "
172                        "sysfs file is deprecated - used by: %s\n", current->comm);
173                 print_once = 1;
174         }
175         return sprintf(buf, "%u\n", MIN_SAMPLING_RATE);
176 }
177
178 #define define_one_ro(_name)                            \
179 static struct freq_attr _name =                         \
180 __ATTR(_name, 0444, show_##_name, NULL)
181
182 define_one_ro(sampling_rate_max);
183 define_one_ro(sampling_rate_min);
184
185 /* cpufreq_conservative Governor Tunables */
186 #define show_one(file_name, object)                                     \
187 static ssize_t show_##file_name                                         \
188 (struct cpufreq_policy *unused, char *buf)                              \
189 {                                                                       \
190         return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
191 }
192 show_one(sampling_rate, sampling_rate);
193 show_one(sampling_down_factor, sampling_down_factor);
194 show_one(up_threshold, up_threshold);
195 show_one(down_threshold, down_threshold);
196 show_one(ignore_nice_load, ignore_nice);
197 show_one(freq_step, freq_step);
198
199 static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
200                 const char *buf, size_t count)
201 {
202         unsigned int input;
203         int ret;
204         ret = sscanf(buf, "%u", &input);
205         if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
206                 return -EINVAL;
207
208         mutex_lock(&dbs_mutex);
209         dbs_tuners_ins.sampling_down_factor = input;
210         mutex_unlock(&dbs_mutex);
211
212         return count;
213 }
214
215 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
216                 const char *buf, size_t count)
217 {
218         unsigned int input;
219         int ret;
220         ret = sscanf(buf, "%u", &input);
221
222         mutex_lock(&dbs_mutex);
223         if (ret != 1) {
224                 mutex_unlock(&dbs_mutex);
225                 return -EINVAL;
226         }
227         dbs_tuners_ins.sampling_rate = max(input, minimum_sampling_rate());
228         mutex_unlock(&dbs_mutex);
229
230         return count;
231 }
232
233 static ssize_t store_up_threshold(struct cpufreq_policy *unused,
234                 const char *buf, size_t count)
235 {
236         unsigned int input;
237         int ret;
238         ret = sscanf(buf, "%u", &input);
239
240         mutex_lock(&dbs_mutex);
241         if (ret != 1 || input > 100 ||
242             input <= dbs_tuners_ins.down_threshold) {
243                 mutex_unlock(&dbs_mutex);
244                 return -EINVAL;
245         }
246
247         dbs_tuners_ins.up_threshold = input;
248         mutex_unlock(&dbs_mutex);
249
250         return count;
251 }
252
253 static ssize_t store_down_threshold(struct cpufreq_policy *unused,
254                 const char *buf, size_t count)
255 {
256         unsigned int input;
257         int ret;
258         ret = sscanf(buf, "%u", &input);
259
260         mutex_lock(&dbs_mutex);
261         if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) {
262                 mutex_unlock(&dbs_mutex);
263                 return -EINVAL;
264         }
265
266         dbs_tuners_ins.down_threshold = input;
267         mutex_unlock(&dbs_mutex);
268
269         return count;
270 }
271
272 static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
273                 const char *buf, size_t count)
274 {
275         unsigned int input;
276         int ret;
277
278         unsigned int j;
279
280         ret = sscanf(buf, "%u", &input);
281         if (ret != 1)
282                 return -EINVAL;
283
284         if (input > 1)
285                 input = 1;
286
287         mutex_lock(&dbs_mutex);
288         if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
289                 mutex_unlock(&dbs_mutex);
290                 return count;
291         }
292         dbs_tuners_ins.ignore_nice = input;
293
294         /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
295         for_each_online_cpu(j) {
296                 struct cpu_dbs_info_s *j_dbs_info;
297                 j_dbs_info = &per_cpu(cpu_dbs_info, j);
298                 j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
299                 j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
300         }
301         mutex_unlock(&dbs_mutex);
302
303         return count;
304 }
305
306 static ssize_t store_freq_step(struct cpufreq_policy *policy,
307                 const char *buf, size_t count)
308 {
309         unsigned int input;
310         int ret;
311
312         ret = sscanf(buf, "%u", &input);
313
314         if (ret != 1)
315                 return -EINVAL;
316
317         if (input > 100)
318                 input = 100;
319
320         /* no need to test here if freq_step is zero as the user might actually
321          * want this, they would be crazy though :) */
322         mutex_lock(&dbs_mutex);
323         dbs_tuners_ins.freq_step = input;
324         mutex_unlock(&dbs_mutex);
325
326         return count;
327 }
328
329 #define define_one_rw(_name) \
330 static struct freq_attr _name = \
331 __ATTR(_name, 0644, show_##_name, store_##_name)
332
333 define_one_rw(sampling_rate);
334 define_one_rw(sampling_down_factor);
335 define_one_rw(up_threshold);
336 define_one_rw(down_threshold);
337 define_one_rw(ignore_nice_load);
338 define_one_rw(freq_step);
339
340 static struct attribute *dbs_attributes[] = {
341         &sampling_rate_max.attr,
342         &sampling_rate_min.attr,
343         &sampling_rate.attr,
344         &sampling_down_factor.attr,
345         &up_threshold.attr,
346         &down_threshold.attr,
347         &ignore_nice_load.attr,
348         &freq_step.attr,
349         NULL
350 };
351
352 static struct attribute_group dbs_attr_group = {
353         .attrs = dbs_attributes,
354         .name = "conservative",
355 };
356
357 /************************** sysfs end ************************/
358
359 static void dbs_check_cpu(int cpu)
360 {
361         unsigned int idle_ticks, up_idle_ticks, down_idle_ticks;
362         unsigned int tmp_idle_ticks, total_idle_ticks;
363         unsigned int freq_target;
364         unsigned int freq_down_sampling_rate;
365         struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
366         struct cpufreq_policy *policy;
367
368         if (!this_dbs_info->enable)
369                 return;
370
371         policy = this_dbs_info->cur_policy;
372
373         /*
374          * The default safe range is 20% to 80%
375          * Every sampling_rate, we check
376          *      - If current idle time is less than 20%, then we try to
377          *        increase frequency
378          * Every sampling_rate*sampling_down_factor, we check
379          *      - If current idle time is more than 80%, then we try to
380          *        decrease frequency
381          *
382          * Any frequency increase takes it to the maximum frequency.
383          * Frequency reduction happens at minimum steps of
384          * 5% (default) of max_frequency
385          */
386
387         /* Check for frequency increase */
388         idle_ticks = UINT_MAX;
389
390         /* Check for frequency increase */
391         total_idle_ticks = get_cpu_idle_time(cpu);
392         tmp_idle_ticks = total_idle_ticks -
393                 this_dbs_info->prev_cpu_idle_up;
394         this_dbs_info->prev_cpu_idle_up = total_idle_ticks;
395
396         if (tmp_idle_ticks < idle_ticks)
397                 idle_ticks = tmp_idle_ticks;
398
399         /* Scale idle ticks by 100 and compare with up and down ticks */
400         idle_ticks *= 100;
401         up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
402                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
403
404         if (idle_ticks < up_idle_ticks) {
405                 this_dbs_info->down_skip = 0;
406                 this_dbs_info->prev_cpu_idle_down =
407                         this_dbs_info->prev_cpu_idle_up;
408
409                 /* if we are already at full speed then break out early */
410                 if (this_dbs_info->requested_freq == policy->max)
411                         return;
412
413                 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
414
415                 /* max freq cannot be less than 100. But who knows.... */
416                 if (unlikely(freq_target == 0))
417                         freq_target = 5;
418
419                 this_dbs_info->requested_freq += freq_target;
420                 if (this_dbs_info->requested_freq > policy->max)
421                         this_dbs_info->requested_freq = policy->max;
422
423                 __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
424                         CPUFREQ_RELATION_H);
425                 return;
426         }
427
428         /* Check for frequency decrease */
429         this_dbs_info->down_skip++;
430         if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor)
431                 return;
432
433         /* Check for frequency decrease */
434         total_idle_ticks = this_dbs_info->prev_cpu_idle_up;
435         tmp_idle_ticks = total_idle_ticks -
436                 this_dbs_info->prev_cpu_idle_down;
437         this_dbs_info->prev_cpu_idle_down = total_idle_ticks;
438
439         if (tmp_idle_ticks < idle_ticks)
440                 idle_ticks = tmp_idle_ticks;
441
442         /* Scale idle ticks by 100 and compare with up and down ticks */
443         idle_ticks *= 100;
444         this_dbs_info->down_skip = 0;
445
446         freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
447                 dbs_tuners_ins.sampling_down_factor;
448         down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) *
449                 usecs_to_jiffies(freq_down_sampling_rate);
450
451         if (idle_ticks > down_idle_ticks) {
452                 /*
453                  * if we are already at the lowest speed then break out early
454                  * or if we 'cannot' reduce the speed as the user might want
455                  * freq_target to be zero
456                  */
457                 if (this_dbs_info->requested_freq == policy->min
458                                 || dbs_tuners_ins.freq_step == 0)
459                         return;
460
461                 freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100;
462
463                 /* max freq cannot be less than 100. But who knows.... */
464                 if (unlikely(freq_target == 0))
465                         freq_target = 5;
466
467                 this_dbs_info->requested_freq -= freq_target;
468                 if (this_dbs_info->requested_freq < policy->min)
469                         this_dbs_info->requested_freq = policy->min;
470
471                 __cpufreq_driver_target(policy, this_dbs_info->requested_freq,
472                                 CPUFREQ_RELATION_H);
473                 return;
474         }
475 }
476
477 static void do_dbs_timer(struct work_struct *work)
478 {
479         int i;
480         mutex_lock(&dbs_mutex);
481         for_each_online_cpu(i)
482                 dbs_check_cpu(i);
483         schedule_delayed_work(&dbs_work,
484                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
485         mutex_unlock(&dbs_mutex);
486 }
487
488 static inline void dbs_timer_init(void)
489 {
490         init_timer_deferrable(&dbs_work.timer);
491         schedule_delayed_work(&dbs_work,
492                         usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
493         return;
494 }
495
496 static inline void dbs_timer_exit(void)
497 {
498         cancel_delayed_work(&dbs_work);
499         return;
500 }
501
502 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
503                                    unsigned int event)
504 {
505         unsigned int cpu = policy->cpu;
506         struct cpu_dbs_info_s *this_dbs_info;
507         unsigned int j;
508         int rc;
509
510         this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
511
512         switch (event) {
513         case CPUFREQ_GOV_START:
514                 if ((!cpu_online(cpu)) || (!policy->cur))
515                         return -EINVAL;
516
517                 if (this_dbs_info->enable) /* Already enabled */
518                         break;
519
520                 mutex_lock(&dbs_mutex);
521
522                 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
523                 if (rc) {
524                         mutex_unlock(&dbs_mutex);
525                         return rc;
526                 }
527
528                 for_each_cpu(j, policy->cpus) {
529                         struct cpu_dbs_info_s *j_dbs_info;
530                         j_dbs_info = &per_cpu(cpu_dbs_info, j);
531                         j_dbs_info->cur_policy = policy;
532
533                         j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu);
534                         j_dbs_info->prev_cpu_idle_down
535                                 = j_dbs_info->prev_cpu_idle_up;
536                 }
537                 this_dbs_info->enable = 1;
538                 this_dbs_info->down_skip = 0;
539                 this_dbs_info->requested_freq = policy->cur;
540
541                 dbs_enable++;
542                 /*
543                  * Start the timerschedule work, when this governor
544                  * is used for first time
545                  */
546                 if (dbs_enable == 1) {
547                         unsigned int latency;
548                         /* policy latency is in nS. Convert it to uS first */
549                         latency = policy->cpuinfo.transition_latency / 1000;
550                         if (latency == 0)
551                                 latency = 1;
552
553                         def_sampling_rate =
554                                 max(10 * latency * LATENCY_MULTIPLIER,
555                                     MIN_STAT_SAMPLING_RATE);
556
557                         dbs_tuners_ins.sampling_rate = def_sampling_rate;
558
559                         dbs_timer_init();
560                         cpufreq_register_notifier(
561                                         &dbs_cpufreq_notifier_block,
562                                         CPUFREQ_TRANSITION_NOTIFIER);
563                 }
564
565                 mutex_unlock(&dbs_mutex);
566                 break;
567
568         case CPUFREQ_GOV_STOP:
569                 mutex_lock(&dbs_mutex);
570                 this_dbs_info->enable = 0;
571                 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
572                 dbs_enable--;
573                 /*
574                  * Stop the timerschedule work, when this governor
575                  * is used for first time
576                  */
577                 if (dbs_enable == 0) {
578                         dbs_timer_exit();
579                         cpufreq_unregister_notifier(
580                                         &dbs_cpufreq_notifier_block,
581                                         CPUFREQ_TRANSITION_NOTIFIER);
582                 }
583
584                 mutex_unlock(&dbs_mutex);
585
586                 break;
587
588         case CPUFREQ_GOV_LIMITS:
589                 mutex_lock(&dbs_mutex);
590                 if (policy->max < this_dbs_info->cur_policy->cur)
591                         __cpufreq_driver_target(
592                                         this_dbs_info->cur_policy,
593                                         policy->max, CPUFREQ_RELATION_H);
594                 else if (policy->min > this_dbs_info->cur_policy->cur)
595                         __cpufreq_driver_target(
596                                         this_dbs_info->cur_policy,
597                                         policy->min, CPUFREQ_RELATION_L);
598                 mutex_unlock(&dbs_mutex);
599                 break;
600         }
601         return 0;
602 }
603
604 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
605 static
606 #endif
607 struct cpufreq_governor cpufreq_gov_conservative = {
608         .name                   = "conservative",
609         .governor               = cpufreq_governor_dbs,
610         .max_transition_latency = TRANSITION_LATENCY_LIMIT,
611         .owner                  = THIS_MODULE,
612 };
613
614 static int __init cpufreq_gov_dbs_init(void)
615 {
616         return cpufreq_register_governor(&cpufreq_gov_conservative);
617 }
618
619 static void __exit cpufreq_gov_dbs_exit(void)
620 {
621         /* Make sure that the scheduled work is indeed not running */
622         flush_scheduled_work();
623
624         cpufreq_unregister_governor(&cpufreq_gov_conservative);
625 }
626
627
628 MODULE_AUTHOR("Alexander Clouter <alex-kernel@digriz.org.uk>");
629 MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
630                 "Low Latency Frequency Transition capable processors "
631                 "optimised for use in a battery environment");
632 MODULE_LICENSE("GPL");
633
634 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
635 fs_initcall(cpufreq_gov_dbs_init);
636 #else
637 module_init(cpufreq_gov_dbs_init);
638 #endif
639 module_exit(cpufreq_gov_dbs_exit);