]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - drivers/misc/cpuload.c
misc: nct1008: Add new suspend mode for Tegra LP1
[linux-2.6.git] / drivers / misc / cpuload.c
1 /*
2  * drivers/misc/cpuload.c
3  *
4  * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This software is licensed under the terms of the GNU General Public
7  * License version 2, as published by the Free Software Foundation, and
8  * may be copied, distributed, and modified under those terms.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  */
16
17 #include <linux/cpu.h>
18 #include <linux/cpumask.h>
19 #include <linux/cpufreq.h>
20 #include <linux/module.h>
21 #include <linux/mutex.h>
22 #include <linux/sched.h>
23 #include <linux/tick.h>
24 #include <linux/timer.h>
25 #include <linux/workqueue.h>
26 #include <linux/kthread.h>
27 #include <linux/mutex.h>
28
29 #include <asm/cputime.h>
30
31 static atomic_t active_count = ATOMIC_INIT(0);
32 static unsigned int enabled;
33
34 static void cpuloadmon_enable(unsigned int state);
35
36 struct cpuloadmon_cpuinfo {
37         /* cpu load */
38         struct timer_list cpu_timer;
39         int timer_idlecancel;
40         u64 time_in_idle;
41         u64 time_in_iowait;
42         u64 idle_exit_time;
43         u64 timer_run_time;
44         int idling;
45         int monitor_enabled;
46         int cpu_load;
47
48         /* runnable threads */
49         u64 previous_integral;
50         unsigned int avg;
51         bool integral_sampled;
52         u64 prev_timestamp;
53 };
54
55 static DEFINE_PER_CPU(struct cpuloadmon_cpuinfo, cpuinfo);
56
57 /* Consider IO as busy */
58 static unsigned long io_is_busy;
59
60 /*
61  * The sample rate of the timer used to increase frequency
62  */
63 #define DEFAULT_TIMER_RATE 20000;
64 static unsigned long timer_rate;
65
66 /* nr runnable threads */
67 #define NR_FSHIFT_EXP   3
68 #define NR_FSHIFT       (1 << NR_FSHIFT_EXP)
69 #define EXP    1497 /* 20 msec window */
70
71 static inline cputime64_t get_cpu_iowait_time(
72         unsigned int cpu, cputime64_t *wall)
73 {
74         u64 iowait_time = get_cpu_iowait_time_us(cpu, wall);
75
76         if (iowait_time == -1ULL)
77                 return 0;
78
79         return iowait_time;
80 }
81
82 static void cpuloadmon_timer(unsigned long data)
83 {
84         unsigned int delta_idle;
85         unsigned int delta_iowait;
86         unsigned int delta_time;
87         u64 time_in_idle;
88         u64 time_in_iowait;
89         u64 idle_exit_time;
90         struct cpuloadmon_cpuinfo *pcpu =
91                 &per_cpu(cpuinfo, data);
92         u64 now_idle;
93         u64 now_iowait;
94         u64 integral, old_integral, delta_integral, delta_time_nr, cur_time;
95
96         smp_rmb();
97
98         if (!pcpu->monitor_enabled)
99                 goto exit;
100
101         /*
102          * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time,
103          * this lets idle exit know the current idle time sample has
104          * been processed, and idle exit can generate a new sample and
105          * re-arm the timer.  This prevents a concurrent idle
106          * exit on that CPU from writing a new set of info at the same time
107          * the timer function runs (the timer function can't use that info
108          * until more time passes).
109          */
110         time_in_idle = pcpu->time_in_idle;
111         time_in_iowait = pcpu->time_in_iowait;
112         idle_exit_time = pcpu->idle_exit_time;
113         now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time);
114         now_iowait = get_cpu_iowait_time(data, NULL);
115         smp_wmb();
116
117         /* If we raced with cancelling a timer, skip. */
118         if (!idle_exit_time)
119                 goto exit;
120
121         delta_idle = (unsigned int)(now_idle - time_in_idle);
122         delta_iowait = (unsigned int)(now_iowait - time_in_iowait);
123         delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time);
124
125         /*
126          * If timer ran less than 1ms after short-term sample started, retry.
127          */
128         if (delta_time < 1000)
129                 goto rearm;
130
131         if (!io_is_busy)
132                 delta_idle += delta_iowait;
133
134         if (delta_idle > delta_time)
135                 pcpu->cpu_load = 0;
136         else
137                 pcpu->cpu_load = 100 * (delta_time - delta_idle) / delta_time;
138
139         /* get avg nr runnables */
140         integral = nr_running_integral(data);
141         old_integral = pcpu->previous_integral;
142         pcpu->previous_integral = integral;
143         cur_time = ktime_to_ns(ktime_get());
144         delta_time_nr = cur_time - pcpu->prev_timestamp;
145         pcpu->prev_timestamp = cur_time;
146
147         if (!pcpu->integral_sampled) {
148                 pcpu->integral_sampled = true;
149                 /* First sample to initialize prev_integral, skip
150                  * avg calculation
151                  */
152         } else {
153                 if (integral < old_integral) {
154                         /* Overflow */
155                         delta_integral = (ULLONG_MAX - old_integral) + integral;
156                 } else {
157                         delta_integral = integral - old_integral;
158                 }
159
160                 /* Calculate average for the previous sample window */
161                 do_div(delta_integral, delta_time_nr);
162                 pcpu->avg = delta_integral;
163         }
164
165 rearm:
166         if (!timer_pending(&pcpu->cpu_timer)) {
167                 if (pcpu->idling)
168                         goto exit;
169
170                 pcpu->time_in_idle = get_cpu_idle_time_us(
171                         data, &pcpu->idle_exit_time);
172                 pcpu->time_in_iowait = get_cpu_iowait_time(
173                         data, NULL);
174
175                 mod_timer(&pcpu->cpu_timer,
176                           jiffies + usecs_to_jiffies(timer_rate));
177         }
178
179 exit:
180         return;
181 }
182
183 static void cpuloadmon_idle_start(void)
184 {
185         struct cpuloadmon_cpuinfo *pcpu =
186                 &per_cpu(cpuinfo, smp_processor_id());
187         int pending;
188
189         if (!pcpu->monitor_enabled)
190                 return;
191
192         pcpu->idling = 1;
193         smp_wmb();
194         pending = timer_pending(&pcpu->cpu_timer);
195
196         if (pending && pcpu->timer_idlecancel) {
197                 del_timer(&pcpu->cpu_timer);
198                 /*
199                  * Ensure last timer run time is after current idle
200                  * sample start time, so next idle exit will always
201                  * start a new idle sampling period.
202                  */
203                 pcpu->idle_exit_time = 0;
204                 pcpu->timer_idlecancel = 0;
205         }
206 }
207
208 static void cpuloadmon_idle_end(void)
209 {
210         struct cpuloadmon_cpuinfo *pcpu =
211                 &per_cpu(cpuinfo, smp_processor_id());
212
213         if (!pcpu->monitor_enabled)
214                 return;
215
216         pcpu->idling = 0;
217         smp_wmb();
218
219         /*
220          * Arm the timer for 1-2 ticks later if not already, and if the timer
221          * function has already processed the previous load sampling
222          * interval.  (If the timer is not pending but has not processed
223          * the previous interval, it is probably racing with us on another
224          * CPU.  Let it compute load based on the previous sample and then
225          * re-arm the timer for another interval when it's done, rather
226          * than updating the interval start time to be "now", which doesn't
227          * give the timer function enough time to make a decision on this
228          * run.)
229          */
230         if (timer_pending(&pcpu->cpu_timer) == 0 &&
231             pcpu->timer_run_time >= pcpu->idle_exit_time &&
232             pcpu->monitor_enabled) {
233                 pcpu->time_in_idle =
234                         get_cpu_idle_time_us(smp_processor_id(),
235                                              &pcpu->idle_exit_time);
236                 pcpu->time_in_iowait =
237                         get_cpu_iowait_time(smp_processor_id(),
238                                                 NULL);
239                 pcpu->timer_idlecancel = 0;
240                 mod_timer(&pcpu->cpu_timer,
241                           jiffies + usecs_to_jiffies(timer_rate));
242         }
243 }
244
245 #define DECL_CPULOAD_ATTR(name) \
246 static ssize_t show_##name(struct kobject *kobj, \
247         struct attribute *attr, char *buf) \
248 { \
249         return sprintf(buf, "%lu\n", name); \
250 } \
251 \
252 static ssize_t store_##name(struct kobject *kobj,\
253                 struct attribute *attr, const char *buf, size_t count) \
254 { \
255         int ret; \
256         unsigned long val; \
257 \
258         ret = kstrtoul(buf, 0, &val); \
259         if (ret < 0) \
260                 return ret; \
261         name = val; \
262         return count; \
263 } \
264 \
265 static struct global_attr name##_attr = __ATTR(name, 0644, \
266                 show_##name, store_##name);
267
268 static ssize_t show_cpus_online(struct kobject *kobj,
269                 struct attribute *attr, char *buf)
270 {
271         unsigned int i, t;
272         const cpumask_t *cpus = cpu_online_mask;
273
274         i = 0;
275         for_each_cpu_mask(t, *cpus)
276                 i++;
277
278         return sprintf(buf, "%u\n", i);
279 }
280
281 static struct global_attr cpus_online_attr = __ATTR(cpus_online, 0444,
282                 show_cpus_online, NULL);
283
284 static ssize_t show_cpu_load(struct kobject *kobj,
285                 struct attribute *attr, char *buf)
286 {
287         unsigned int t, len, total;
288         const cpumask_t *cpus = cpu_online_mask;
289         struct cpuloadmon_cpuinfo *pcpu;
290
291         total = 0;
292
293         for_each_cpu_mask(t, *cpus) {
294                 pcpu = &per_cpu(cpuinfo, t);
295                 len = sprintf(buf, "%u %u %u\n",
296                         t, pcpu->cpu_load, pcpu->avg);
297                 total += len;
298                 buf = &buf[len];
299         }
300
301         return total;
302 }
303
304 static struct global_attr cpu_load_attr = __ATTR(cpu_load, 0444,
305                 show_cpu_load, NULL);
306
307 static ssize_t show_enable(struct kobject *kobj,
308                 struct attribute *attr, char *buf)
309 {
310         return sprintf(buf, "%u\n", enabled);
311 }
312
313 static ssize_t store_enable(struct kobject *kobj,
314                 struct attribute *attr, const char *buf, size_t count)
315 {
316         int ret;
317         unsigned long val;
318         unsigned int before = enabled;
319
320         ret = kstrtoul(buf, 0, &val);
321         if (ret < 0)
322                 return ret;
323         enabled = val;
324         if (before != enabled)
325                         cpuloadmon_enable(enabled);
326
327         return count;
328 }
329 static struct global_attr enable_attr = __ATTR(enable, 0644,
330                 show_enable, store_enable);
331
332 DECL_CPULOAD_ATTR(io_is_busy)
333 DECL_CPULOAD_ATTR(timer_rate)
334 #undef DECL_CPULOAD_ATTR
335
336 static struct attribute *cpuload_attributes[] = {
337         &io_is_busy_attr.attr,
338         &timer_rate_attr.attr,
339         &cpus_online_attr.attr,
340         &cpu_load_attr.attr,
341         &enable_attr.attr,
342         NULL,
343 };
344
345 static struct attribute_group cpuload_attr_group = {
346         .attrs = cpuload_attributes,
347         .name = "cpuload",
348 };
349
350 static int cpuloadmon_idle_notifier(struct notifier_block *nb,
351                                              unsigned long val,
352                                              void *data)
353 {
354         switch (val) {
355         case IDLE_START:
356                 cpuloadmon_idle_start();
357                 break;
358         case IDLE_END:
359                 cpuloadmon_idle_end();
360                 break;
361         }
362
363         return 0;
364 }
365
366 static struct notifier_block cpuloadmon_idle_nb = {
367         .notifier_call = cpuloadmon_idle_notifier,
368 };
369
370 static void cpuloadmon_enable(unsigned int state)
371 {
372         unsigned int j;
373         struct cpuloadmon_cpuinfo *pcpu;
374         const cpumask_t *cpus = cpu_possible_mask;
375
376         if (state) {
377                 u64 last_update;
378
379                 for_each_cpu(j, cpus) {
380                         pcpu = &per_cpu(cpuinfo, j);
381                         pcpu->time_in_idle =
382                                 get_cpu_idle_time_us(j, &last_update);
383                         pcpu->idle_exit_time = last_update;
384                         pcpu->time_in_iowait =
385                                 get_cpu_iowait_time(j, NULL);
386                         pcpu->timer_idlecancel = 1;
387                         pcpu->monitor_enabled = 1;
388                         smp_wmb();
389
390                         if (!timer_pending(&pcpu->cpu_timer))
391                                 mod_timer(&pcpu->cpu_timer, jiffies + 2);
392                 }
393         } else {
394                 for_each_cpu(j, cpus) {
395                         pcpu = &per_cpu(cpuinfo, j);
396                         pcpu->monitor_enabled = 0;
397                         smp_wmb();
398                         del_timer_sync(&pcpu->cpu_timer);
399
400                         /*
401                          * Reset idle exit time since we may cancel the timer
402                          * before it can run after the last idle exit time,
403                          * to avoid tripping the check in idle exit for a timer
404                          * that is trying to run.
405                          */
406                         pcpu->idle_exit_time = 0;
407                 }
408         }
409
410         enabled = state;
411 }
412
413 static int cpuloadmon_start(void)
414 {
415         int rc;
416
417         cpuloadmon_enable(1);
418
419         /*
420          * Do not register the idle hook and create sysfs
421          * entries if we have already done so.
422          */
423         if (atomic_inc_return(&active_count) > 1)
424                 return 0;
425
426         rc = sysfs_create_group(cpufreq_global_kobject,
427                         &cpuload_attr_group);
428         if (rc)
429                 return rc;
430
431         idle_notifier_register(&cpuloadmon_idle_nb);
432
433         return 0;
434 }
435
436 static int cpuloadmon_stop(void)
437 {
438         cpuloadmon_enable(0);
439
440         if (atomic_dec_return(&active_count) > 0)
441                 return 0;
442
443         idle_notifier_unregister(&cpuloadmon_idle_nb);
444         sysfs_remove_group(cpufreq_global_kobject,
445                         &cpuload_attr_group);
446
447         return 0;
448 }
449
450 static int __init cpuload_monitor_init(void)
451 {
452         unsigned int i;
453         struct cpuloadmon_cpuinfo *pcpu;
454
455         timer_rate = DEFAULT_TIMER_RATE;
456
457         /* Initalize per-cpu timers */
458         for_each_possible_cpu(i) {
459                 pcpu = &per_cpu(cpuinfo, i);
460                 init_timer(&pcpu->cpu_timer);
461                 pcpu->cpu_timer.function = cpuloadmon_timer;
462                 pcpu->cpu_timer.data = i;
463         }
464
465         cpuloadmon_start();
466
467         /* disable by default */
468         cpuloadmon_enable(0);
469
470         return 0;
471 }
472
473 module_init(cpuload_monitor_init);
474
475 static void __exit cpuload_monitor_exit(void)
476 {
477         cpuloadmon_stop();
478 }
479
480 module_exit(cpuload_monitor_exit);
481
482 MODULE_AUTHOR("Ilan Aelion <iaelion@nvidia.com>");
483 MODULE_DESCRIPTION("'cpuload_monitor' - A cpu load monitor");
484 MODULE_LICENSE("GPL");