da98362e5f24b31af7d1daeb442dd874fb433a7f
[linux-2.6.git] / drivers / cpuquiet / governors / balanced.c
1 /*
2  * Copyright (c) 2012 NVIDIA CORPORATION.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; version 2 of the License.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along
14  * with this program; if not, write to the Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
16  *
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/cpuquiet.h>
21 #include <linux/cpumask.h>
22 #include <linux/module.h>
23 #include <linux/cpufreq.h>
24 #include <linux/pm_qos_params.h>
25 #include <linux/jiffies.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/sched.h>
29 #include <linux/tick.h>
30 #include <asm/cputime.h>
31
32 #define CPUNAMELEN 8
33
34 typedef enum {
35         CPU_SPEED_BALANCED,
36         CPU_SPEED_BIASED,
37         CPU_SPEED_SKEWED,
38 } CPU_SPEED_BALANCE;
39
40 typedef enum {
41         IDLE,
42         DOWN,
43         UP,
44 } BALANCED_STATE;
45
46 struct idle_info {
47         u64 idle_last;
48         u64 last_timestamp;
49         u64 idle_current;
50         u64 timestamp;
51 };
52
53 static DEFINE_PER_CPU(struct idle_info, idleinfo);
54 static DEFINE_PER_CPU(unsigned int, cpu_load);
55
56 static struct timer_list load_timer;
57 static bool load_timer_active;
58
59 /* configurable parameters */
60 static unsigned int  balance_level = 60;
61 static unsigned int  idle_bottom_freq;
62 static unsigned int  idle_top_freq;
63 static unsigned long up_delay;
64 static unsigned long down_delay;
65 static unsigned long last_change_time;
66 static unsigned int  load_sample_rate = 20; /* msec */
67 static struct workqueue_struct *balanced_wq;
68 static struct delayed_work balanced_work;
69 static BALANCED_STATE balanced_state;
70 static struct kobject *balanced_kobject;
71
72 static void calculate_load_timer(unsigned long data)
73 {
74         int i;
75         u64 idle_time, elapsed_time;
76
77         if (!load_timer_active)
78                 return;
79
80         for_each_online_cpu(i) {
81                 struct idle_info *iinfo = &per_cpu(idleinfo, i);
82                 unsigned int *load = &per_cpu(cpu_load, i);
83
84                 iinfo->idle_last = iinfo->idle_current;
85                 iinfo->last_timestamp = iinfo->timestamp;
86                 iinfo->idle_current =
87                         get_cpu_idle_time_us(i, &iinfo->timestamp);
88                 elapsed_time = iinfo->timestamp - iinfo->last_timestamp;
89
90                 idle_time = iinfo->idle_current - iinfo->idle_last;
91                 idle_time *= 100;
92                 do_div(idle_time, elapsed_time);
93                 *load = 100 - idle_time;
94         }
95         mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate));
96 }
97
98 static void start_load_timer(void)
99 {
100         int i;
101
102         if (load_timer_active)
103                 return;
104
105         load_timer_active = true;
106
107         for_each_online_cpu(i) {
108                 struct idle_info *iinfo = &per_cpu(idleinfo, i);
109
110                 iinfo->idle_current =
111                         get_cpu_idle_time_us(i, &iinfo->timestamp);
112         }
113         mod_timer(&load_timer, jiffies + msecs_to_jiffies(100));
114 }
115
116 static void stop_load_timer(void)
117 {
118         if (!load_timer_active)
119                 return;
120
121         load_timer_active = false;
122         del_timer(&load_timer);
123 }
124
125 static unsigned int get_slowest_cpu_n(void)
126 {
127         unsigned int cpu = nr_cpu_ids;
128         unsigned long minload = ULONG_MAX;
129         int i;
130
131         for_each_online_cpu(i) {
132                 unsigned int *load = &per_cpu(cpu_load, i);
133
134                 if ((i > 0) && (minload > *load)) {
135                         cpu = i;
136                         minload = *load;
137                 }
138         }
139
140         return cpu;
141 }
142
143 static unsigned int cpu_highest_speed(void)
144 {
145         unsigned int maxload = 0;
146         int i;
147
148         for_each_online_cpu(i) {
149                 unsigned int *load = &per_cpu(cpu_load, i);
150
151                 maxload = max(maxload, *load);
152         }
153
154         return maxload;
155 }
156
157 static unsigned int count_slow_cpus(unsigned int limit)
158 {
159         unsigned int cnt = 0;
160         int i;
161
162         for_each_online_cpu(i) {
163                 unsigned int *load = &per_cpu(cpu_load, i);
164
165                 if (*load <= limit)
166                         cnt++;
167         }
168
169         return cnt;
170 }
171
172 #define NR_FSHIFT       2
173 static unsigned int nr_run_thresholds[] = {
174 /*      1,  2,  3,  4 - on-line cpus target */
175         5,  9, 10, UINT_MAX /* avg run threads * 4 (e.g., 9 = 2.25 threads) */
176 };
177 static unsigned int nr_run_hysteresis = 2;      /* 0.5 thread */
178 static unsigned int nr_run_last;
179
180 static CPU_SPEED_BALANCE balanced_speed_balance(void)
181 {
182         unsigned long highest_speed = cpu_highest_speed();
183         unsigned long balanced_speed = highest_speed * balance_level / 100;
184         unsigned long skewed_speed = balanced_speed / 2;
185         unsigned int nr_cpus = num_online_cpus();
186         unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
187         unsigned int avg_nr_run = avg_nr_running();
188         unsigned int nr_run;
189
190         /* balanced: freq targets for all CPUs are above 50% of highest speed
191            biased: freq target for at least one CPU is below 50% threshold
192            skewed: freq targets for at least 2 CPUs are below 25% threshold */
193         for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
194                 unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
195                 if (nr_run_last <= nr_run)
196                         nr_threshold += nr_run_hysteresis;
197                 if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT)))
198                         break;
199         }
200         nr_run_last = nr_run;
201
202         if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus ||
203                 nr_run < nr_cpus)
204                 return CPU_SPEED_SKEWED;
205
206         if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus ||
207                 nr_run <= nr_cpus)
208                 return CPU_SPEED_BIASED;
209
210         return CPU_SPEED_BALANCED;
211 }
212
213 static void balanced_work_func(struct work_struct *work)
214 {
215         bool up = false;
216         unsigned int cpu = nr_cpu_ids;
217         unsigned long now = jiffies;
218
219         CPU_SPEED_BALANCE balance;
220
221         switch (balanced_state) {
222         case IDLE:
223                 break;
224         case DOWN:
225                 cpu = get_slowest_cpu_n();
226                 if (cpu < nr_cpu_ids) {
227                         up = false;
228                         queue_delayed_work(balanced_wq,
229                                                  &balanced_work, up_delay);
230                 } else
231                         stop_load_timer();
232                 break;
233         case UP:
234                 balance = balanced_speed_balance();
235                 switch (balance) {
236
237                 /* cpu speed is up and balanced - one more on-line */
238                 case CPU_SPEED_BALANCED:
239                         cpu = cpumask_next_zero(0, cpu_online_mask);
240                         if (cpu < nr_cpu_ids)
241                                 up = true;
242                         break;
243                 /* cpu speed is up, but skewed - remove one core */
244                 case CPU_SPEED_SKEWED:
245                         cpu = get_slowest_cpu_n();
246                         if (cpu < nr_cpu_ids)
247                                 up = false;
248                         break;
249                 /* cpu speed is up, but under-utilized - do nothing */
250                 case CPU_SPEED_BIASED:
251                 default:
252                         break;
253                 }
254                 queue_delayed_work(
255                         balanced_wq, &balanced_work, up_delay);
256                 break;
257         default:
258                 pr_err("%s: invalid cpuquiet balanced governor state %d\n",
259                        __func__, balanced_state);
260         }
261
262         if (!up && ((now - last_change_time) < down_delay))
263                 cpu = nr_cpu_ids;
264
265         if (cpu < nr_cpu_ids) {
266                 last_change_time = now;
267                 if (up)
268                         cpuquiet_wake_cpu(cpu);
269                 else
270                         cpuquiet_quiesence_cpu(cpu);
271         }
272 }
273
274 static int balanced_cpufreq_transition(struct notifier_block *nb,
275         unsigned long state, void *data)
276 {
277         struct cpufreq_freqs *freqs = data;
278         unsigned long cpu_freq;
279
280         if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE) {
281                 cpu_freq = freqs->new;
282
283                 switch (balanced_state) {
284                 case IDLE:
285                         if (cpu_freq > idle_top_freq) {
286                                 balanced_state = UP;
287                                 queue_delayed_work(
288                                         balanced_wq, &balanced_work, up_delay);
289                                 start_load_timer();
290                         } else if (cpu_freq <= idle_bottom_freq) {
291                                 balanced_state = DOWN;
292                                 queue_delayed_work(
293                                         balanced_wq, &balanced_work,
294                                         down_delay);
295                                 start_load_timer();
296                         }
297                         break;
298                 case DOWN:
299                         if (cpu_freq > idle_top_freq) {
300                                 balanced_state = UP;
301                                 queue_delayed_work(
302                                         balanced_wq, &balanced_work, up_delay);
303                                 start_load_timer();
304                         }
305                         break;
306                 case UP:
307                         if (cpu_freq <= idle_bottom_freq) {
308                                 balanced_state = DOWN;
309                                 queue_delayed_work(balanced_wq,
310                                         &balanced_work, up_delay);
311                                 start_load_timer();
312                         }
313                         break;
314                 default:
315                         pr_err("%s: invalid cpuquiet balanced governor "
316                                 "state %d\n", __func__, balanced_state);
317                 }
318         }
319
320         return NOTIFY_OK;
321 }
322
323 static struct notifier_block balanced_cpufreq_nb = {
324         .notifier_call = balanced_cpufreq_transition,
325 };
326
327 static void delay_callback(struct cpuquiet_attribute *attr)
328 {
329         unsigned long val;
330
331         if (attr) {
332                 val = (*((unsigned long *)(attr->param)));
333                 (*((unsigned long *)(attr->param))) = msecs_to_jiffies(val);
334         }
335 }
336
337 CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint);
338 CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint);
339 CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint);
340 CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint);
341 CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback);
342 CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback);
343
344 static struct attribute *balanced_attributes[] = {
345         &balance_level_attr.attr,
346         &idle_bottom_freq_attr.attr,
347         &idle_top_freq_attr.attr,
348         &up_delay_attr.attr,
349         &down_delay_attr.attr,
350         NULL,
351 };
352
353 static const struct sysfs_ops balanced_sysfs_ops = {
354         .show = cpuquiet_auto_sysfs_show,
355         .store = cpuquiet_auto_sysfs_store,
356 };
357
358 static struct kobj_type ktype_balanced = {
359         .sysfs_ops = &balanced_sysfs_ops,
360         .default_attrs = balanced_attributes,
361 };
362
363 static int balanced_sysfs(void)
364 {
365         int err;
366
367         balanced_kobject = kzalloc(sizeof(*balanced_kobject),
368                                 GFP_KERNEL);
369
370         if (!balanced_kobject)
371                 return -ENOMEM;
372
373         err = cpuquiet_kobject_init(balanced_kobject, &ktype_balanced,
374                                 "balanced");
375
376         if (err)
377                 kfree(balanced_kobject);
378
379         return err;
380 }
381
382 static void balanced_stop(void)
383 {
384         /*
385            first unregister the notifiers. This ensures the governor state
386            can't be modified by a cpufreq transition
387         */
388         cpufreq_unregister_notifier(&balanced_cpufreq_nb,
389                 CPUFREQ_TRANSITION_NOTIFIER);
390
391         /* now we can force the governor to be idle */
392         balanced_state = IDLE;
393         cancel_delayed_work_sync(&balanced_work);
394         destroy_workqueue(balanced_wq);
395         del_timer(&load_timer);
396
397         kobject_put(balanced_kobject);
398 }
399
400 static int balanced_start(void)
401 {
402         int err, count;
403         struct cpufreq_frequency_table *table;
404         struct cpufreq_freqs initial_freq;
405
406         err = balanced_sysfs();
407         if (err)
408                 return err;
409
410         balanced_wq = alloc_workqueue("cpuquiet-balanced",
411                         WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
412         if (!balanced_wq)
413                 return -ENOMEM;
414
415         INIT_DELAYED_WORK(&balanced_work, balanced_work_func);
416
417         up_delay = msecs_to_jiffies(100);
418         down_delay = msecs_to_jiffies(500);
419
420         table = cpufreq_frequency_get_table(0);
421         for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++);
422
423         idle_top_freq = table[(count / 2) - 1].frequency;
424         idle_bottom_freq = table[(count / 2) - 2].frequency;
425
426         cpufreq_register_notifier(&balanced_cpufreq_nb,
427                 CPUFREQ_TRANSITION_NOTIFIER);
428
429         init_timer(&load_timer);
430         load_timer.function = calculate_load_timer;
431
432         /*FIXME: Kick start the state machine by faking a freq notification*/
433         initial_freq.new = cpufreq_get(0);
434         if (initial_freq.new != 0)
435                 balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE,
436                                                 &initial_freq);
437         return 0;
438 }
439
440 struct cpuquiet_governor balanced_governor = {
441         .name           = "balanced",
442         .start          = balanced_start,
443         .stop           = balanced_stop,
444         .owner          = THIS_MODULE,
445 };
446
447 static int __init init_balanced(void)
448 {
449         return cpuquiet_register_governor(&balanced_governor);
450 }
451
452 static void __exit exit_balanced(void)
453 {
454         cpuquiet_unregister_governor(&balanced_governor);
455 }
456
457 MODULE_LICENSE("GPL");
458 module_init(init_balanced);
459 module_exit(exit_balanced);
460