]> nv-tegra.nvidia Code Review - linux-3.10.git/blob - drivers/cpufreq/cpufreq.c
Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-3.10.git] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/notifier.h>
24 #include <linux/cpufreq.h>
25 #include <linux/delay.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/device.h>
29 #include <linux/slab.h>
30 #include <linux/cpu.h>
31 #include <linux/completion.h>
32 #include <linux/mutex.h>
33 #include <linux/syscore_ops.h>
34
35 #include <trace/events/power.h>
36
37 /**
38  * The "cpufreq driver" - the arch- or hardware-dependent low
39  * level driver of CPUFreq support, and its spinlock. This lock
40  * also protects the cpufreq_cpu_data array.
41  */
42 static struct cpufreq_driver *cpufreq_driver;
43 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
44 #ifdef CONFIG_HOTPLUG_CPU
45 /* This one keeps track of the previously set governor of a removed CPU */
46 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
47 #endif
48 static DEFINE_SPINLOCK(cpufreq_driver_lock);
49
50 /*
51  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
52  * all cpufreq/hotplug/workqueue/etc related lock issues.
53  *
54  * The rules for this semaphore:
55  * - Any routine that wants to read from the policy structure will
56  *   do a down_read on this semaphore.
57  * - Any routine that will write to the policy structure and/or may take away
58  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
59  *   mode before doing so.
60  *
61  * Additional rules:
62  * - All holders of the lock should check to make sure that the CPU they
63  *   are concerned with are online after they get the lock.
64  * - Governor routines that can be called in cpufreq hotplug path should not
65  *   take this sem as top level hotplug notifier handler takes this.
66  * - Lock should not be held across
67  *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
68  */
69 static DEFINE_PER_CPU(int, cpufreq_policy_cpu);
70 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
71
72 #define lock_policy_rwsem(mode, cpu)                                    \
73 static int lock_policy_rwsem_##mode                                     \
74 (int cpu)                                                               \
75 {                                                                       \
76         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);              \
77         BUG_ON(policy_cpu == -1);                                       \
78         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
79         if (unlikely(!cpu_online(cpu))) {                               \
80                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
81                 return -1;                                              \
82         }                                                               \
83                                                                         \
84         return 0;                                                       \
85 }
86
87 lock_policy_rwsem(read, cpu);
88
89 lock_policy_rwsem(write, cpu);
90
91 static void unlock_policy_rwsem_read(int cpu)
92 {
93         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
94         BUG_ON(policy_cpu == -1);
95         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
96 }
97
98 static void unlock_policy_rwsem_write(int cpu)
99 {
100         int policy_cpu = per_cpu(cpufreq_policy_cpu, cpu);
101         BUG_ON(policy_cpu == -1);
102         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
103 }
104
105
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy,
108                 unsigned int event);
109 static unsigned int __cpufreq_get(unsigned int cpu);
110 static void handle_update(struct work_struct *work);
111
112 /**
113  * Two notifier lists: the "policy" list is involved in the
114  * validation process for a new CPU frequency policy; the
115  * "transition" list for kernel code that needs to handle
116  * changes to devices when the CPU clock speed changes.
117  * The mutex locks both lists.
118  */
119 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
120 static struct srcu_notifier_head cpufreq_transition_notifier_list;
121
122 static bool init_cpufreq_transition_notifier_list_called;
123 static int __init init_cpufreq_transition_notifier_list(void)
124 {
125         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
126         init_cpufreq_transition_notifier_list_called = true;
127         return 0;
128 }
129 pure_initcall(init_cpufreq_transition_notifier_list);
130
131 static int off __read_mostly;
132 static int cpufreq_disabled(void)
133 {
134         return off;
135 }
136 void disable_cpufreq(void)
137 {
138         off = 1;
139 }
140 static LIST_HEAD(cpufreq_governor_list);
141 static DEFINE_MUTEX(cpufreq_governor_mutex);
142
143 static struct cpufreq_policy *__cpufreq_cpu_get(unsigned int cpu, bool sysfs)
144 {
145         struct cpufreq_policy *data;
146         unsigned long flags;
147
148         if (cpu >= nr_cpu_ids)
149                 goto err_out;
150
151         /* get the cpufreq driver */
152         spin_lock_irqsave(&cpufreq_driver_lock, flags);
153
154         if (!cpufreq_driver)
155                 goto err_out_unlock;
156
157         if (!try_module_get(cpufreq_driver->owner))
158                 goto err_out_unlock;
159
160
161         /* get the CPU */
162         data = per_cpu(cpufreq_cpu_data, cpu);
163
164         if (!data)
165                 goto err_out_put_module;
166
167         if (!sysfs && !kobject_get(&data->kobj))
168                 goto err_out_put_module;
169
170         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
171         return data;
172
173 err_out_put_module:
174         module_put(cpufreq_driver->owner);
175 err_out_unlock:
176         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
177 err_out:
178         return NULL;
179 }
180
181 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
182 {
183         return __cpufreq_cpu_get(cpu, false);
184 }
185 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
186
187 static struct cpufreq_policy *cpufreq_cpu_get_sysfs(unsigned int cpu)
188 {
189         return __cpufreq_cpu_get(cpu, true);
190 }
191
192 static void __cpufreq_cpu_put(struct cpufreq_policy *data, bool sysfs)
193 {
194         if (!sysfs)
195                 kobject_put(&data->kobj);
196         module_put(cpufreq_driver->owner);
197 }
198
199 void cpufreq_cpu_put(struct cpufreq_policy *data)
200 {
201         __cpufreq_cpu_put(data, false);
202 }
203 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
204
205 static void cpufreq_cpu_put_sysfs(struct cpufreq_policy *data)
206 {
207         __cpufreq_cpu_put(data, true);
208 }
209
210 /*********************************************************************
211  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
212  *********************************************************************/
213
214 /**
215  * adjust_jiffies - adjust the system "loops_per_jiffy"
216  *
217  * This function alters the system "loops_per_jiffy" for the clock
218  * speed change. Note that loops_per_jiffy cannot be updated on SMP
219  * systems as each CPU might be scaled differently. So, use the arch
220  * per-CPU loops_per_jiffy value wherever possible.
221  */
222 #ifndef CONFIG_SMP
223 static unsigned long l_p_j_ref;
224 static unsigned int  l_p_j_ref_freq;
225
226 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
227 {
228         if (ci->flags & CPUFREQ_CONST_LOOPS)
229                 return;
230
231         if (!l_p_j_ref_freq) {
232                 l_p_j_ref = loops_per_jiffy;
233                 l_p_j_ref_freq = ci->old;
234                 pr_debug("saving %lu as reference value for loops_per_jiffy; "
235                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
236         }
237         if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
238             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
239                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
240                                                                 ci->new);
241                 pr_debug("scaling loops_per_jiffy to %lu "
242                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
243         }
244 }
245 #else
246 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
247 {
248         return;
249 }
250 #endif
251
252
253 /**
254  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
255  * on frequency transition.
256  *
257  * This function calls the transition notifiers and the "adjust_jiffies"
258  * function. It is called twice on all CPU frequency changes that have
259  * external effects.
260  */
261 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
262 {
263         struct cpufreq_policy *policy;
264
265         BUG_ON(irqs_disabled());
266
267         freqs->flags = cpufreq_driver->flags;
268         pr_debug("notification %u of frequency transition to %u kHz\n",
269                 state, freqs->new);
270
271         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
272         switch (state) {
273
274         case CPUFREQ_PRECHANGE:
275                 /* detect if the driver reported a value as "old frequency"
276                  * which is not equal to what the cpufreq core thinks is
277                  * "old frequency".
278                  */
279                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
280                         if ((policy) && (policy->cpu == freqs->cpu) &&
281                             (policy->cur) && (policy->cur != freqs->old)) {
282                                 pr_debug("Warning: CPU frequency is"
283                                         " %u, cpufreq assumed %u kHz.\n",
284                                         freqs->old, policy->cur);
285                                 freqs->old = policy->cur;
286                         }
287                 }
288                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
289                                 CPUFREQ_PRECHANGE, freqs);
290                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
291                 break;
292
293         case CPUFREQ_POSTCHANGE:
294                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
295                 pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
296                         (unsigned long)freqs->cpu);
297                 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
298                 trace_cpu_frequency(freqs->new, freqs->cpu);
299                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
300                                 CPUFREQ_POSTCHANGE, freqs);
301                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
302                         policy->cur = freqs->new;
303                 break;
304         }
305 }
306 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
307
308
309
310 /*********************************************************************
311  *                          SYSFS INTERFACE                          *
312  *********************************************************************/
313
314 static struct cpufreq_governor *__find_governor(const char *str_governor)
315 {
316         struct cpufreq_governor *t;
317
318         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
319                 if (!strnicmp(str_governor, t->name, CPUFREQ_NAME_LEN))
320                         return t;
321
322         return NULL;
323 }
324
325 /**
326  * cpufreq_parse_governor - parse a governor string
327  */
328 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
329                                 struct cpufreq_governor **governor)
330 {
331         int err = -EINVAL;
332
333         if (!cpufreq_driver)
334                 goto out;
335
336         if (cpufreq_driver->setpolicy) {
337                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
338                         *policy = CPUFREQ_POLICY_PERFORMANCE;
339                         err = 0;
340                 } else if (!strnicmp(str_governor, "powersave",
341                                                 CPUFREQ_NAME_LEN)) {
342                         *policy = CPUFREQ_POLICY_POWERSAVE;
343                         err = 0;
344                 }
345         } else if (cpufreq_driver->target) {
346                 struct cpufreq_governor *t;
347
348                 mutex_lock(&cpufreq_governor_mutex);
349
350                 t = __find_governor(str_governor);
351
352                 if (t == NULL) {
353                         int ret;
354
355                         mutex_unlock(&cpufreq_governor_mutex);
356                         ret = request_module("cpufreq_%s", str_governor);
357                         mutex_lock(&cpufreq_governor_mutex);
358
359                         if (ret == 0)
360                                 t = __find_governor(str_governor);
361                 }
362
363                 if (t != NULL) {
364                         *governor = t;
365                         err = 0;
366                 }
367
368                 mutex_unlock(&cpufreq_governor_mutex);
369         }
370 out:
371         return err;
372 }
373
374
375 /**
376  * cpufreq_per_cpu_attr_read() / show_##file_name() -
377  * print out cpufreq information
378  *
379  * Write out information from cpufreq_driver->policy[cpu]; object must be
380  * "unsigned int".
381  */
382
383 #define show_one(file_name, object)                     \
384 static ssize_t show_##file_name                         \
385 (struct cpufreq_policy *policy, char *buf)              \
386 {                                                       \
387         return sprintf(buf, "%u\n", policy->object);    \
388 }
389
390 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
391 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
392 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
393 show_one(scaling_min_freq, min);
394 show_one(scaling_max_freq, max);
395 show_one(scaling_cur_freq, cur);
396
397 static int __cpufreq_set_policy(struct cpufreq_policy *data,
398                                 struct cpufreq_policy *policy);
399
400 /**
401  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
402  */
403 #define store_one(file_name, object)                    \
404 static ssize_t store_##file_name                                        \
405 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
406 {                                                                       \
407         unsigned int ret;                                               \
408         struct cpufreq_policy new_policy;                               \
409                                                                         \
410         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
411         if (ret)                                                        \
412                 return -EINVAL;                                         \
413                                                                         \
414         ret = sscanf(buf, "%u", &new_policy.object);                    \
415         if (ret != 1)                                                   \
416                 return -EINVAL;                                         \
417                                                                         \
418         ret = __cpufreq_set_policy(policy, &new_policy);                \
419         policy->user_policy.object = policy->object;                    \
420                                                                         \
421         return ret ? ret : count;                                       \
422 }
423
424 store_one(scaling_min_freq, min);
425 store_one(scaling_max_freq, max);
426
427 /**
428  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
429  */
430 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
431                                         char *buf)
432 {
433         unsigned int cur_freq = __cpufreq_get(policy->cpu);
434         if (!cur_freq)
435                 return sprintf(buf, "<unknown>");
436         return sprintf(buf, "%u\n", cur_freq);
437 }
438
439
440 /**
441  * show_scaling_governor - show the current policy for the specified CPU
442  */
443 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
444 {
445         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
446                 return sprintf(buf, "powersave\n");
447         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
448                 return sprintf(buf, "performance\n");
449         else if (policy->governor)
450                 return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
451                                 policy->governor->name);
452         return -EINVAL;
453 }
454
455
456 /**
457  * store_scaling_governor - store policy for the specified CPU
458  */
459 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
460                                         const char *buf, size_t count)
461 {
462         unsigned int ret;
463         char    str_governor[16];
464         struct cpufreq_policy new_policy;
465
466         ret = cpufreq_get_policy(&new_policy, policy->cpu);
467         if (ret)
468                 return ret;
469
470         ret = sscanf(buf, "%15s", str_governor);
471         if (ret != 1)
472                 return -EINVAL;
473
474         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
475                                                 &new_policy.governor))
476                 return -EINVAL;
477
478         /* Do not use cpufreq_set_policy here or the user_policy.max
479            will be wrongly overridden */
480         ret = __cpufreq_set_policy(policy, &new_policy);
481
482         policy->user_policy.policy = policy->policy;
483         policy->user_policy.governor = policy->governor;
484
485         if (ret)
486                 return ret;
487         else
488                 return count;
489 }
490
491 /**
492  * show_scaling_driver - show the cpufreq driver currently loaded
493  */
494 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
495 {
496         return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
497 }
498
499 /**
500  * show_scaling_available_governors - show the available CPUfreq governors
501  */
502 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
503                                                 char *buf)
504 {
505         ssize_t i = 0;
506         struct cpufreq_governor *t;
507
508         if (!cpufreq_driver->target) {
509                 i += sprintf(buf, "performance powersave");
510                 goto out;
511         }
512
513         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
514                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
515                     - (CPUFREQ_NAME_LEN + 2)))
516                         goto out;
517                 i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
518         }
519 out:
520         i += sprintf(&buf[i], "\n");
521         return i;
522 }
523
524 static ssize_t show_cpus(const struct cpumask *mask, char *buf)
525 {
526         ssize_t i = 0;
527         unsigned int cpu;
528
529         for_each_cpu(cpu, mask) {
530                 if (i)
531                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
532                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
533                 if (i >= (PAGE_SIZE - 5))
534                         break;
535         }
536         i += sprintf(&buf[i], "\n");
537         return i;
538 }
539
540 /**
541  * show_related_cpus - show the CPUs affected by each transition even if
542  * hw coordination is in use
543  */
544 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
545 {
546         if (cpumask_empty(policy->related_cpus))
547                 return show_cpus(policy->cpus, buf);
548         return show_cpus(policy->related_cpus, buf);
549 }
550
551 /**
552  * show_affected_cpus - show the CPUs affected by each transition
553  */
554 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
555 {
556         return show_cpus(policy->cpus, buf);
557 }
558
559 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
560                                         const char *buf, size_t count)
561 {
562         unsigned int freq = 0;
563         unsigned int ret;
564
565         if (!policy->governor || !policy->governor->store_setspeed)
566                 return -EINVAL;
567
568         ret = sscanf(buf, "%u", &freq);
569         if (ret != 1)
570                 return -EINVAL;
571
572         policy->governor->store_setspeed(policy, freq);
573
574         return count;
575 }
576
577 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
578 {
579         if (!policy->governor || !policy->governor->show_setspeed)
580                 return sprintf(buf, "<unsupported>\n");
581
582         return policy->governor->show_setspeed(policy, buf);
583 }
584
585 /**
586  * show_bios_limit - show the current cpufreq HW/BIOS limitation
587  */
588 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
589 {
590         unsigned int limit;
591         int ret;
592         if (cpufreq_driver->bios_limit) {
593                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
594                 if (!ret)
595                         return sprintf(buf, "%u\n", limit);
596         }
597         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
598 }
599
600 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
601 cpufreq_freq_attr_ro(cpuinfo_min_freq);
602 cpufreq_freq_attr_ro(cpuinfo_max_freq);
603 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
604 cpufreq_freq_attr_ro(scaling_available_governors);
605 cpufreq_freq_attr_ro(scaling_driver);
606 cpufreq_freq_attr_ro(scaling_cur_freq);
607 cpufreq_freq_attr_ro(bios_limit);
608 cpufreq_freq_attr_ro(related_cpus);
609 cpufreq_freq_attr_ro(affected_cpus);
610 cpufreq_freq_attr_rw(scaling_min_freq);
611 cpufreq_freq_attr_rw(scaling_max_freq);
612 cpufreq_freq_attr_rw(scaling_governor);
613 cpufreq_freq_attr_rw(scaling_setspeed);
614
615 static struct attribute *default_attrs[] = {
616         &cpuinfo_min_freq.attr,
617         &cpuinfo_max_freq.attr,
618         &cpuinfo_transition_latency.attr,
619         &scaling_min_freq.attr,
620         &scaling_max_freq.attr,
621         &affected_cpus.attr,
622         &related_cpus.attr,
623         &scaling_governor.attr,
624         &scaling_driver.attr,
625         &scaling_available_governors.attr,
626         &scaling_setspeed.attr,
627         NULL
628 };
629
630 struct kobject *cpufreq_global_kobject;
631 EXPORT_SYMBOL(cpufreq_global_kobject);
632
633 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
634 #define to_attr(a) container_of(a, struct freq_attr, attr)
635
636 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
637 {
638         struct cpufreq_policy *policy = to_policy(kobj);
639         struct freq_attr *fattr = to_attr(attr);
640         ssize_t ret = -EINVAL;
641         policy = cpufreq_cpu_get_sysfs(policy->cpu);
642         if (!policy)
643                 goto no_policy;
644
645         if (lock_policy_rwsem_read(policy->cpu) < 0)
646                 goto fail;
647
648         if (fattr->show)
649                 ret = fattr->show(policy, buf);
650         else
651                 ret = -EIO;
652
653         unlock_policy_rwsem_read(policy->cpu);
654 fail:
655         cpufreq_cpu_put_sysfs(policy);
656 no_policy:
657         return ret;
658 }
659
660 static ssize_t store(struct kobject *kobj, struct attribute *attr,
661                      const char *buf, size_t count)
662 {
663         struct cpufreq_policy *policy = to_policy(kobj);
664         struct freq_attr *fattr = to_attr(attr);
665         ssize_t ret = -EINVAL;
666         policy = cpufreq_cpu_get_sysfs(policy->cpu);
667         if (!policy)
668                 goto no_policy;
669
670         if (lock_policy_rwsem_write(policy->cpu) < 0)
671                 goto fail;
672
673         if (fattr->store)
674                 ret = fattr->store(policy, buf, count);
675         else
676                 ret = -EIO;
677
678         unlock_policy_rwsem_write(policy->cpu);
679 fail:
680         cpufreq_cpu_put_sysfs(policy);
681 no_policy:
682         return ret;
683 }
684
685 static void cpufreq_sysfs_release(struct kobject *kobj)
686 {
687         struct cpufreq_policy *policy = to_policy(kobj);
688         pr_debug("last reference is dropped\n");
689         complete(&policy->kobj_unregister);
690 }
691
692 static const struct sysfs_ops sysfs_ops = {
693         .show   = show,
694         .store  = store,
695 };
696
697 static struct kobj_type ktype_cpufreq = {
698         .sysfs_ops      = &sysfs_ops,
699         .default_attrs  = default_attrs,
700         .release        = cpufreq_sysfs_release,
701 };
702
703 /*
704  * Returns:
705  *   Negative: Failure
706  *   0:        Success
707  *   Positive: When we have a managed CPU and the sysfs got symlinked
708  */
709 static int cpufreq_add_dev_policy(unsigned int cpu,
710                                   struct cpufreq_policy *policy,
711                                   struct device *dev)
712 {
713         int ret = 0;
714 #ifdef CONFIG_SMP
715         unsigned long flags;
716         unsigned int j;
717 #ifdef CONFIG_HOTPLUG_CPU
718         struct cpufreq_governor *gov;
719
720         gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
721         if (gov) {
722                 policy->governor = gov;
723                 pr_debug("Restoring governor %s for cpu %d\n",
724                        policy->governor->name, cpu);
725         }
726 #endif
727
728         for_each_cpu(j, policy->cpus) {
729                 struct cpufreq_policy *managed_policy;
730
731                 if (cpu == j)
732                         continue;
733
734                 /* Check for existing affected CPUs.
735                  * They may not be aware of it due to CPU Hotplug.
736                  * cpufreq_cpu_put is called when the device is removed
737                  * in __cpufreq_remove_dev()
738                  */
739                 managed_policy = cpufreq_cpu_get(j);
740                 if (unlikely(managed_policy)) {
741
742                         /* Set proper policy_cpu */
743                         unlock_policy_rwsem_write(cpu);
744                         per_cpu(cpufreq_policy_cpu, cpu) = managed_policy->cpu;
745
746                         if (lock_policy_rwsem_write(cpu) < 0) {
747                                 /* Should not go through policy unlock path */
748                                 if (cpufreq_driver->exit)
749                                         cpufreq_driver->exit(policy);
750                                 cpufreq_cpu_put(managed_policy);
751                                 return -EBUSY;
752                         }
753
754                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
755                         cpumask_copy(managed_policy->cpus, policy->cpus);
756                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
757                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
758
759                         pr_debug("CPU already managed, adding link\n");
760                         ret = sysfs_create_link(&dev->kobj,
761                                                 &managed_policy->kobj,
762                                                 "cpufreq");
763                         if (ret)
764                                 cpufreq_cpu_put(managed_policy);
765                         /*
766                          * Success. We only needed to be added to the mask.
767                          * Call driver->exit() because only the cpu parent of
768                          * the kobj needed to call init().
769                          */
770                         if (cpufreq_driver->exit)
771                                 cpufreq_driver->exit(policy);
772
773                         if (!ret)
774                                 return 1;
775                         else
776                                 return ret;
777                 }
778         }
779 #endif
780         return ret;
781 }
782
783
784 /* symlink affected CPUs */
785 static int cpufreq_add_dev_symlink(unsigned int cpu,
786                                    struct cpufreq_policy *policy)
787 {
788         unsigned int j;
789         int ret = 0;
790
791         for_each_cpu(j, policy->cpus) {
792                 struct cpufreq_policy *managed_policy;
793                 struct device *cpu_dev;
794
795                 if (j == cpu)
796                         continue;
797                 if (!cpu_online(j))
798                         continue;
799
800                 pr_debug("CPU %u already managed, adding link\n", j);
801                 managed_policy = cpufreq_cpu_get(cpu);
802                 cpu_dev = get_cpu_device(j);
803                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
804                                         "cpufreq");
805                 if (ret) {
806                         cpufreq_cpu_put(managed_policy);
807                         return ret;
808                 }
809         }
810         return ret;
811 }
812
813 static int cpufreq_add_dev_interface(unsigned int cpu,
814                                      struct cpufreq_policy *policy,
815                                      struct device *dev)
816 {
817         struct cpufreq_policy new_policy;
818         struct freq_attr **drv_attr;
819         unsigned long flags;
820         int ret = 0;
821         unsigned int j;
822
823         /* prepare interface data */
824         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
825                                    &dev->kobj, "cpufreq");
826         if (ret)
827                 return ret;
828
829         /* set up files for this cpu device */
830         drv_attr = cpufreq_driver->attr;
831         while ((drv_attr) && (*drv_attr)) {
832                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
833                 if (ret)
834                         goto err_out_kobj_put;
835                 drv_attr++;
836         }
837         if (cpufreq_driver->get) {
838                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
839                 if (ret)
840                         goto err_out_kobj_put;
841         }
842         if (cpufreq_driver->target) {
843                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
844                 if (ret)
845                         goto err_out_kobj_put;
846         }
847         if (cpufreq_driver->bios_limit) {
848                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
849                 if (ret)
850                         goto err_out_kobj_put;
851         }
852
853         spin_lock_irqsave(&cpufreq_driver_lock, flags);
854         for_each_cpu(j, policy->cpus) {
855                 if (!cpu_online(j))
856                         continue;
857                 per_cpu(cpufreq_cpu_data, j) = policy;
858                 per_cpu(cpufreq_policy_cpu, j) = policy->cpu;
859         }
860         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
861
862         ret = cpufreq_add_dev_symlink(cpu, policy);
863         if (ret)
864                 goto err_out_kobj_put;
865
866         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
867         /* assure that the starting sequence is run in __cpufreq_set_policy */
868         policy->governor = NULL;
869
870         /* set default policy */
871         ret = __cpufreq_set_policy(policy, &new_policy);
872         policy->user_policy.policy = policy->policy;
873         policy->user_policy.governor = policy->governor;
874
875         if (ret) {
876                 pr_debug("setting policy failed\n");
877                 if (cpufreq_driver->exit)
878                         cpufreq_driver->exit(policy);
879         }
880         return ret;
881
882 err_out_kobj_put:
883         kobject_put(&policy->kobj);
884         wait_for_completion(&policy->kobj_unregister);
885         return ret;
886 }
887
888
889 /**
890  * cpufreq_add_dev - add a CPU device
891  *
892  * Adds the cpufreq interface for a CPU device.
893  *
894  * The Oracle says: try running cpufreq registration/unregistration concurrently
895  * with with cpu hotplugging and all hell will break loose. Tried to clean this
896  * mess up, but more thorough testing is needed. - Mathieu
897  */
898 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
899 {
900         unsigned int cpu = dev->id;
901         int ret = 0, found = 0;
902         struct cpufreq_policy *policy;
903         unsigned long flags;
904         unsigned int j;
905 #ifdef CONFIG_HOTPLUG_CPU
906         int sibling;
907 #endif
908
909         if (cpu_is_offline(cpu))
910                 return 0;
911
912         pr_debug("adding CPU %u\n", cpu);
913
914 #ifdef CONFIG_SMP
915         /* check whether a different CPU already registered this
916          * CPU because it is in the same boat. */
917         policy = cpufreq_cpu_get(cpu);
918         if (unlikely(policy)) {
919                 cpufreq_cpu_put(policy);
920                 return 0;
921         }
922 #endif
923
924         if (!try_module_get(cpufreq_driver->owner)) {
925                 ret = -EINVAL;
926                 goto module_out;
927         }
928
929         ret = -ENOMEM;
930         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
931         if (!policy)
932                 goto nomem_out;
933
934         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
935                 goto err_free_policy;
936
937         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
938                 goto err_free_cpumask;
939
940         policy->cpu = cpu;
941         cpumask_copy(policy->cpus, cpumask_of(cpu));
942
943         /* Initially set CPU itself as the policy_cpu */
944         per_cpu(cpufreq_policy_cpu, cpu) = cpu;
945         ret = (lock_policy_rwsem_write(cpu) < 0);
946         WARN_ON(ret);
947
948         init_completion(&policy->kobj_unregister);
949         INIT_WORK(&policy->update, handle_update);
950
951         /* Set governor before ->init, so that driver could check it */
952 #ifdef CONFIG_HOTPLUG_CPU
953         for_each_online_cpu(sibling) {
954                 struct cpufreq_policy *cp = per_cpu(cpufreq_cpu_data, sibling);
955                 if (cp && cp->governor &&
956                     (cpumask_test_cpu(cpu, cp->related_cpus))) {
957                         policy->governor = cp->governor;
958                         found = 1;
959                         break;
960                 }
961         }
962 #endif
963         if (!found)
964                 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
965         /* call driver. From then on the cpufreq must be able
966          * to accept all calls to ->verify and ->setpolicy for this CPU
967          */
968         ret = cpufreq_driver->init(policy);
969         if (ret) {
970                 pr_debug("initialization failed\n");
971                 goto err_unlock_policy;
972         }
973         policy->user_policy.min = policy->min;
974         policy->user_policy.max = policy->max;
975
976         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
977                                      CPUFREQ_START, policy);
978
979         ret = cpufreq_add_dev_policy(cpu, policy, dev);
980         if (ret) {
981                 if (ret > 0)
982                         /* This is a managed cpu, symlink created,
983                            exit with 0 */
984                         ret = 0;
985                 goto err_unlock_policy;
986         }
987
988         ret = cpufreq_add_dev_interface(cpu, policy, dev);
989         if (ret)
990                 goto err_out_unregister;
991
992         unlock_policy_rwsem_write(cpu);
993
994         kobject_uevent(&policy->kobj, KOBJ_ADD);
995         module_put(cpufreq_driver->owner);
996         pr_debug("initialization complete\n");
997
998         return 0;
999
1000
1001 err_out_unregister:
1002         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1003         for_each_cpu(j, policy->cpus)
1004                 per_cpu(cpufreq_cpu_data, j) = NULL;
1005         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1006
1007         kobject_put(&policy->kobj);
1008         wait_for_completion(&policy->kobj_unregister);
1009
1010 err_unlock_policy:
1011         unlock_policy_rwsem_write(cpu);
1012         free_cpumask_var(policy->related_cpus);
1013 err_free_cpumask:
1014         free_cpumask_var(policy->cpus);
1015 err_free_policy:
1016         kfree(policy);
1017 nomem_out:
1018         module_put(cpufreq_driver->owner);
1019 module_out:
1020         return ret;
1021 }
1022
1023
1024 /**
1025  * __cpufreq_remove_dev - remove a CPU device
1026  *
1027  * Removes the cpufreq interface for a CPU device.
1028  * Caller should already have policy_rwsem in write mode for this CPU.
1029  * This routine frees the rwsem before returning.
1030  */
1031 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1032 {
1033         unsigned int cpu = dev->id;
1034         unsigned long flags;
1035         struct cpufreq_policy *data;
1036         struct kobject *kobj;
1037         struct completion *cmp;
1038 #ifdef CONFIG_SMP
1039         struct device *cpu_dev;
1040         unsigned int j;
1041 #endif
1042
1043         pr_debug("unregistering CPU %u\n", cpu);
1044
1045         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1046         data = per_cpu(cpufreq_cpu_data, cpu);
1047
1048         if (!data) {
1049                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1050                 unlock_policy_rwsem_write(cpu);
1051                 return -EINVAL;
1052         }
1053         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1054
1055
1056 #ifdef CONFIG_SMP
1057         /* if this isn't the CPU which is the parent of the kobj, we
1058          * only need to unlink, put and exit
1059          */
1060         if (unlikely(cpu != data->cpu)) {
1061                 pr_debug("removing link\n");
1062                 cpumask_clear_cpu(cpu, data->cpus);
1063                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1064                 kobj = &dev->kobj;
1065                 cpufreq_cpu_put(data);
1066                 unlock_policy_rwsem_write(cpu);
1067                 sysfs_remove_link(kobj, "cpufreq");
1068                 return 0;
1069         }
1070 #endif
1071
1072 #ifdef CONFIG_SMP
1073
1074 #ifdef CONFIG_HOTPLUG_CPU
1075         strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
1076                         CPUFREQ_NAME_LEN);
1077 #endif
1078
1079         /* if we have other CPUs still registered, we need to unlink them,
1080          * or else wait_for_completion below will lock up. Clean the
1081          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1082          * the sysfs links afterwards.
1083          */
1084         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1085                 for_each_cpu(j, data->cpus) {
1086                         if (j == cpu)
1087                                 continue;
1088                         per_cpu(cpufreq_cpu_data, j) = NULL;
1089                 }
1090         }
1091
1092         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1093
1094         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1095                 for_each_cpu(j, data->cpus) {
1096                         if (j == cpu)
1097                                 continue;
1098                         pr_debug("removing link for cpu %u\n", j);
1099 #ifdef CONFIG_HOTPLUG_CPU
1100                         strncpy(per_cpu(cpufreq_cpu_governor, j),
1101                                 data->governor->name, CPUFREQ_NAME_LEN);
1102 #endif
1103                         cpu_dev = get_cpu_device(j);
1104                         kobj = &cpu_dev->kobj;
1105                         unlock_policy_rwsem_write(cpu);
1106                         sysfs_remove_link(kobj, "cpufreq");
1107                         lock_policy_rwsem_write(cpu);
1108                         cpufreq_cpu_put(data);
1109                 }
1110         }
1111 #else
1112         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1113 #endif
1114
1115         if (cpufreq_driver->target)
1116                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1117
1118         kobj = &data->kobj;
1119         cmp = &data->kobj_unregister;
1120         unlock_policy_rwsem_write(cpu);
1121         kobject_put(kobj);
1122
1123         /* we need to make sure that the underlying kobj is actually
1124          * not referenced anymore by anybody before we proceed with
1125          * unloading.
1126          */
1127         pr_debug("waiting for dropping of refcount\n");
1128         wait_for_completion(cmp);
1129         pr_debug("wait complete\n");
1130
1131         lock_policy_rwsem_write(cpu);
1132         if (cpufreq_driver->exit)
1133                 cpufreq_driver->exit(data);
1134         unlock_policy_rwsem_write(cpu);
1135
1136 #ifdef CONFIG_HOTPLUG_CPU
1137         /* when the CPU which is the parent of the kobj is hotplugged
1138          * offline, check for siblings, and create cpufreq sysfs interface
1139          * and symlinks
1140          */
1141         if (unlikely(cpumask_weight(data->cpus) > 1)) {
1142                 /* first sibling now owns the new sysfs dir */
1143                 cpumask_clear_cpu(cpu, data->cpus);
1144                 cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
1145
1146                 /* finally remove our own symlink */
1147                 lock_policy_rwsem_write(cpu);
1148                 __cpufreq_remove_dev(dev, sif);
1149         }
1150 #endif
1151
1152         free_cpumask_var(data->related_cpus);
1153         free_cpumask_var(data->cpus);
1154         kfree(data);
1155
1156         return 0;
1157 }
1158
1159
1160 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1161 {
1162         unsigned int cpu = dev->id;
1163         int retval;
1164
1165         if (cpu_is_offline(cpu))
1166                 return 0;
1167
1168         if (unlikely(lock_policy_rwsem_write(cpu)))
1169                 BUG();
1170
1171         retval = __cpufreq_remove_dev(dev, sif);
1172         return retval;
1173 }
1174
1175
1176 static void handle_update(struct work_struct *work)
1177 {
1178         struct cpufreq_policy *policy =
1179                 container_of(work, struct cpufreq_policy, update);
1180         unsigned int cpu = policy->cpu;
1181         pr_debug("handle_update for cpu %u called\n", cpu);
1182         cpufreq_update_policy(cpu);
1183 }
1184
1185 /**
1186  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1187  *      @cpu: cpu number
1188  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1189  *      @new_freq: CPU frequency the CPU actually runs at
1190  *
1191  *      We adjust to current frequency first, and need to clean up later.
1192  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1193  */
1194 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1195                                 unsigned int new_freq)
1196 {
1197         struct cpufreq_freqs freqs;
1198
1199         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
1200                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1201
1202         freqs.cpu = cpu;
1203         freqs.old = old_freq;
1204         freqs.new = new_freq;
1205         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1206         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1207 }
1208
1209
1210 /**
1211  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1212  * @cpu: CPU number
1213  *
1214  * This is the last known freq, without actually getting it from the driver.
1215  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1216  */
1217 unsigned int cpufreq_quick_get(unsigned int cpu)
1218 {
1219         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1220         unsigned int ret_freq = 0;
1221
1222         if (policy) {
1223                 ret_freq = policy->cur;
1224                 cpufreq_cpu_put(policy);
1225         }
1226
1227         return ret_freq;
1228 }
1229 EXPORT_SYMBOL(cpufreq_quick_get);
1230
1231 /**
1232  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1233  * @cpu: CPU number
1234  *
1235  * Just return the max possible frequency for a given CPU.
1236  */
1237 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1238 {
1239         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1240         unsigned int ret_freq = 0;
1241
1242         if (policy) {
1243                 ret_freq = policy->max;
1244                 cpufreq_cpu_put(policy);
1245         }
1246
1247         return ret_freq;
1248 }
1249 EXPORT_SYMBOL(cpufreq_quick_get_max);
1250
1251
1252 static unsigned int __cpufreq_get(unsigned int cpu)
1253 {
1254         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1255         unsigned int ret_freq = 0;
1256
1257         if (!cpufreq_driver->get)
1258                 return ret_freq;
1259
1260         ret_freq = cpufreq_driver->get(cpu);
1261
1262         if (ret_freq && policy->cur &&
1263                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1264                 /* verify no discrepancy between actual and
1265                                         saved value exists */
1266                 if (unlikely(ret_freq != policy->cur)) {
1267                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1268                         schedule_work(&policy->update);
1269                 }
1270         }
1271
1272         return ret_freq;
1273 }
1274
1275 /**
1276  * cpufreq_get - get the current CPU frequency (in kHz)
1277  * @cpu: CPU number
1278  *
1279  * Get the CPU current (static) CPU frequency
1280  */
1281 unsigned int cpufreq_get(unsigned int cpu)
1282 {
1283         unsigned int ret_freq = 0;
1284         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1285
1286         if (!policy)
1287                 goto out;
1288
1289         if (unlikely(lock_policy_rwsem_read(cpu)))
1290                 goto out_policy;
1291
1292         ret_freq = __cpufreq_get(cpu);
1293
1294         unlock_policy_rwsem_read(cpu);
1295
1296 out_policy:
1297         cpufreq_cpu_put(policy);
1298 out:
1299         return ret_freq;
1300 }
1301 EXPORT_SYMBOL(cpufreq_get);
1302
1303 static struct subsys_interface cpufreq_interface = {
1304         .name           = "cpufreq",
1305         .subsys         = &cpu_subsys,
1306         .add_dev        = cpufreq_add_dev,
1307         .remove_dev     = cpufreq_remove_dev,
1308 };
1309
1310
1311 /**
1312  * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
1313  *
1314  * This function is only executed for the boot processor.  The other CPUs
1315  * have been put offline by means of CPU hotplug.
1316  */
1317 static int cpufreq_bp_suspend(void)
1318 {
1319         int ret = 0;
1320
1321         int cpu = smp_processor_id();
1322         struct cpufreq_policy *cpu_policy;
1323
1324         pr_debug("suspending cpu %u\n", cpu);
1325
1326         /* If there's no policy for the boot CPU, we have nothing to do. */
1327         cpu_policy = cpufreq_cpu_get(cpu);
1328         if (!cpu_policy)
1329                 return 0;
1330
1331         if (cpufreq_driver->suspend) {
1332                 ret = cpufreq_driver->suspend(cpu_policy);
1333                 if (ret)
1334                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1335                                         "step on CPU %u\n", cpu_policy->cpu);
1336         }
1337
1338         cpufreq_cpu_put(cpu_policy);
1339         return ret;
1340 }
1341
1342 /**
1343  * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
1344  *
1345  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1346  *      2.) schedule call cpufreq_update_policy() ASAP as interrupts are
1347  *          restored. It will verify that the current freq is in sync with
1348  *          what we believe it to be. This is a bit later than when it
1349  *          should be, but nonethteless it's better than calling
1350  *          cpufreq_driver->get() here which might re-enable interrupts...
1351  *
1352  * This function is only executed for the boot CPU.  The other CPUs have not
1353  * been turned on yet.
1354  */
1355 static void cpufreq_bp_resume(void)
1356 {
1357         int ret = 0;
1358
1359         int cpu = smp_processor_id();
1360         struct cpufreq_policy *cpu_policy;
1361
1362         pr_debug("resuming cpu %u\n", cpu);
1363
1364         /* If there's no policy for the boot CPU, we have nothing to do. */
1365         cpu_policy = cpufreq_cpu_get(cpu);
1366         if (!cpu_policy)
1367                 return;
1368
1369         if (cpufreq_driver->resume) {
1370                 ret = cpufreq_driver->resume(cpu_policy);
1371                 if (ret) {
1372                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1373                                         "step on CPU %u\n", cpu_policy->cpu);
1374                         goto fail;
1375                 }
1376         }
1377
1378         schedule_work(&cpu_policy->update);
1379
1380 fail:
1381         cpufreq_cpu_put(cpu_policy);
1382 }
1383
1384 static struct syscore_ops cpufreq_syscore_ops = {
1385         .suspend        = cpufreq_bp_suspend,
1386         .resume         = cpufreq_bp_resume,
1387 };
1388
1389
1390 /*********************************************************************
1391  *                     NOTIFIER LISTS INTERFACE                      *
1392  *********************************************************************/
1393
1394 /**
1395  *      cpufreq_register_notifier - register a driver with cpufreq
1396  *      @nb: notifier function to register
1397  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1398  *
1399  *      Add a driver to one of two lists: either a list of drivers that
1400  *      are notified about clock rate changes (once before and once after
1401  *      the transition), or a list of drivers that are notified about
1402  *      changes in cpufreq policy.
1403  *
1404  *      This function may sleep, and has the same return conditions as
1405  *      blocking_notifier_chain_register.
1406  */
1407 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1408 {
1409         int ret;
1410
1411         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1412
1413         switch (list) {
1414         case CPUFREQ_TRANSITION_NOTIFIER:
1415                 ret = srcu_notifier_chain_register(
1416                                 &cpufreq_transition_notifier_list, nb);
1417                 break;
1418         case CPUFREQ_POLICY_NOTIFIER:
1419                 ret = blocking_notifier_chain_register(
1420                                 &cpufreq_policy_notifier_list, nb);
1421                 break;
1422         default:
1423                 ret = -EINVAL;
1424         }
1425
1426         return ret;
1427 }
1428 EXPORT_SYMBOL(cpufreq_register_notifier);
1429
1430
1431 /**
1432  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1433  *      @nb: notifier block to be unregistered
1434  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1435  *
1436  *      Remove a driver from the CPU frequency notifier list.
1437  *
1438  *      This function may sleep, and has the same return conditions as
1439  *      blocking_notifier_chain_unregister.
1440  */
1441 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1442 {
1443         int ret;
1444
1445         switch (list) {
1446         case CPUFREQ_TRANSITION_NOTIFIER:
1447                 ret = srcu_notifier_chain_unregister(
1448                                 &cpufreq_transition_notifier_list, nb);
1449                 break;
1450         case CPUFREQ_POLICY_NOTIFIER:
1451                 ret = blocking_notifier_chain_unregister(
1452                                 &cpufreq_policy_notifier_list, nb);
1453                 break;
1454         default:
1455                 ret = -EINVAL;
1456         }
1457
1458         return ret;
1459 }
1460 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1461
1462
1463 /*********************************************************************
1464  *                              GOVERNORS                            *
1465  *********************************************************************/
1466
1467
1468 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1469                             unsigned int target_freq,
1470                             unsigned int relation)
1471 {
1472         int retval = -EINVAL;
1473         unsigned int old_target_freq = target_freq;
1474
1475         if (cpufreq_disabled())
1476                 return -ENODEV;
1477
1478         /* Make sure that target_freq is within supported range */
1479         if (target_freq > policy->max)
1480                 target_freq = policy->max;
1481         if (target_freq < policy->min)
1482                 target_freq = policy->min;
1483
1484         pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
1485                         policy->cpu, target_freq, relation, old_target_freq);
1486
1487         if (target_freq == policy->cur)
1488                 return 0;
1489
1490         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1491                 retval = cpufreq_driver->target(policy, target_freq, relation);
1492
1493         return retval;
1494 }
1495 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1496
1497 int cpufreq_driver_target(struct cpufreq_policy *policy,
1498                           unsigned int target_freq,
1499                           unsigned int relation)
1500 {
1501         int ret = -EINVAL;
1502
1503         policy = cpufreq_cpu_get(policy->cpu);
1504         if (!policy)
1505                 goto no_policy;
1506
1507         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1508                 goto fail;
1509
1510         ret = __cpufreq_driver_target(policy, target_freq, relation);
1511
1512         unlock_policy_rwsem_write(policy->cpu);
1513
1514 fail:
1515         cpufreq_cpu_put(policy);
1516 no_policy:
1517         return ret;
1518 }
1519 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1520
1521 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1522 {
1523         int ret = 0;
1524
1525         if (!(cpu_online(cpu) && cpufreq_driver->getavg))
1526                 return 0;
1527
1528         policy = cpufreq_cpu_get(policy->cpu);
1529         if (!policy)
1530                 return -EINVAL;
1531
1532         ret = cpufreq_driver->getavg(policy, cpu);
1533
1534         cpufreq_cpu_put(policy);
1535         return ret;
1536 }
1537 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1538
1539 /*
1540  * when "event" is CPUFREQ_GOV_LIMITS
1541  */
1542
1543 static int __cpufreq_governor(struct cpufreq_policy *policy,
1544                                         unsigned int event)
1545 {
1546         int ret;
1547
1548         /* Only must be defined when default governor is known to have latency
1549            restrictions, like e.g. conservative or ondemand.
1550            That this is the case is already ensured in Kconfig
1551         */
1552 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1553         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1554 #else
1555         struct cpufreq_governor *gov = NULL;
1556 #endif
1557
1558         if (policy->governor->max_transition_latency &&
1559             policy->cpuinfo.transition_latency >
1560             policy->governor->max_transition_latency) {
1561                 if (!gov)
1562                         return -EINVAL;
1563                 else {
1564                         printk(KERN_WARNING "%s governor failed, too long"
1565                                " transition latency of HW, fallback"
1566                                " to %s governor\n",
1567                                policy->governor->name,
1568                                gov->name);
1569                         policy->governor = gov;
1570                 }
1571         }
1572
1573         if (!try_module_get(policy->governor->owner))
1574                 return -EINVAL;
1575
1576         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1577                                                 policy->cpu, event);
1578         ret = policy->governor->governor(policy, event);
1579
1580         /* we keep one module reference alive for
1581                         each CPU governed by this CPU */
1582         if ((event != CPUFREQ_GOV_START) || ret)
1583                 module_put(policy->governor->owner);
1584         if ((event == CPUFREQ_GOV_STOP) && !ret)
1585                 module_put(policy->governor->owner);
1586
1587         return ret;
1588 }
1589
1590
1591 int cpufreq_register_governor(struct cpufreq_governor *governor)
1592 {
1593         int err;
1594
1595         if (!governor)
1596                 return -EINVAL;
1597
1598         if (cpufreq_disabled())
1599                 return -ENODEV;
1600
1601         mutex_lock(&cpufreq_governor_mutex);
1602
1603         err = -EBUSY;
1604         if (__find_governor(governor->name) == NULL) {
1605                 err = 0;
1606                 list_add(&governor->governor_list, &cpufreq_governor_list);
1607         }
1608
1609         mutex_unlock(&cpufreq_governor_mutex);
1610         return err;
1611 }
1612 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1613
1614
1615 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1616 {
1617 #ifdef CONFIG_HOTPLUG_CPU
1618         int cpu;
1619 #endif
1620
1621         if (!governor)
1622                 return;
1623
1624         if (cpufreq_disabled())
1625                 return;
1626
1627 #ifdef CONFIG_HOTPLUG_CPU
1628         for_each_present_cpu(cpu) {
1629                 if (cpu_online(cpu))
1630                         continue;
1631                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
1632                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
1633         }
1634 #endif
1635
1636         mutex_lock(&cpufreq_governor_mutex);
1637         list_del(&governor->governor_list);
1638         mutex_unlock(&cpufreq_governor_mutex);
1639         return;
1640 }
1641 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1642
1643
1644
1645 /*********************************************************************
1646  *                          POLICY INTERFACE                         *
1647  *********************************************************************/
1648
1649 /**
1650  * cpufreq_get_policy - get the current cpufreq_policy
1651  * @policy: struct cpufreq_policy into which the current cpufreq_policy
1652  *      is written
1653  *
1654  * Reads the current cpufreq policy.
1655  */
1656 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1657 {
1658         struct cpufreq_policy *cpu_policy;
1659         if (!policy)
1660                 return -EINVAL;
1661
1662         cpu_policy = cpufreq_cpu_get(cpu);
1663         if (!cpu_policy)
1664                 return -EINVAL;
1665
1666         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1667
1668         cpufreq_cpu_put(cpu_policy);
1669         return 0;
1670 }
1671 EXPORT_SYMBOL(cpufreq_get_policy);
1672
1673
1674 /*
1675  * data   : current policy.
1676  * policy : policy to be set.
1677  */
1678 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1679                                 struct cpufreq_policy *policy)
1680 {
1681         int ret = 0;
1682
1683         pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1684                 policy->min, policy->max);
1685
1686         memcpy(&policy->cpuinfo, &data->cpuinfo,
1687                                 sizeof(struct cpufreq_cpuinfo));
1688
1689         if (policy->min > data->max || policy->max < data->min) {
1690                 ret = -EINVAL;
1691                 goto error_out;
1692         }
1693
1694         /* verify the cpu speed can be set within this limit */
1695         ret = cpufreq_driver->verify(policy);
1696         if (ret)
1697                 goto error_out;
1698
1699         /* adjust if necessary - all reasons */
1700         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1701                         CPUFREQ_ADJUST, policy);
1702
1703         /* adjust if necessary - hardware incompatibility*/
1704         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1705                         CPUFREQ_INCOMPATIBLE, policy);
1706
1707         /* verify the cpu speed can be set within this limit,
1708            which might be different to the first one */
1709         ret = cpufreq_driver->verify(policy);
1710         if (ret)
1711                 goto error_out;
1712
1713         /* notification of the new policy */
1714         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1715                         CPUFREQ_NOTIFY, policy);
1716
1717         data->min = policy->min;
1718         data->max = policy->max;
1719
1720         pr_debug("new min and max freqs are %u - %u kHz\n",
1721                                         data->min, data->max);
1722
1723         if (cpufreq_driver->setpolicy) {
1724                 data->policy = policy->policy;
1725                 pr_debug("setting range\n");
1726                 ret = cpufreq_driver->setpolicy(policy);
1727         } else {
1728                 if (policy->governor != data->governor) {
1729                         /* save old, working values */
1730                         struct cpufreq_governor *old_gov = data->governor;
1731
1732                         pr_debug("governor switch\n");
1733
1734                         /* end old governor */
1735                         if (data->governor)
1736                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1737
1738                         /* start new governor */
1739                         data->governor = policy->governor;
1740                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1741                                 /* new governor failed, so re-start old one */
1742                                 pr_debug("starting governor %s failed\n",
1743                                                         data->governor->name);
1744                                 if (old_gov) {
1745                                         data->governor = old_gov;
1746                                         __cpufreq_governor(data,
1747                                                            CPUFREQ_GOV_START);
1748                                 }
1749                                 ret = -EINVAL;
1750                                 goto error_out;
1751                         }
1752                         /* might be a policy change, too, so fall through */
1753                 }
1754                 pr_debug("governor: change or update limits\n");
1755                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1756         }
1757
1758 error_out:
1759         return ret;
1760 }
1761
1762 /**
1763  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1764  *      @cpu: CPU which shall be re-evaluated
1765  *
1766  *      Useful for policy notifiers which have different necessities
1767  *      at different times.
1768  */
1769 int cpufreq_update_policy(unsigned int cpu)
1770 {
1771         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1772         struct cpufreq_policy policy;
1773         int ret;
1774
1775         if (!data) {
1776                 ret = -ENODEV;
1777                 goto no_policy;
1778         }
1779
1780         if (unlikely(lock_policy_rwsem_write(cpu))) {
1781                 ret = -EINVAL;
1782                 goto fail;
1783         }
1784
1785         pr_debug("updating policy for CPU %u\n", cpu);
1786         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1787         policy.min = data->user_policy.min;
1788         policy.max = data->user_policy.max;
1789         policy.policy = data->user_policy.policy;
1790         policy.governor = data->user_policy.governor;
1791
1792         /* BIOS might change freq behind our back
1793           -> ask driver for current freq and notify governors about a change */
1794         if (cpufreq_driver->get) {
1795                 policy.cur = cpufreq_driver->get(cpu);
1796                 if (!data->cur) {
1797                         pr_debug("Driver did not initialize current freq");
1798                         data->cur = policy.cur;
1799                 } else {
1800                         if (data->cur != policy.cur)
1801                                 cpufreq_out_of_sync(cpu, data->cur,
1802                                                                 policy.cur);
1803                 }
1804         }
1805
1806         ret = __cpufreq_set_policy(data, &policy);
1807
1808         unlock_policy_rwsem_write(cpu);
1809
1810 fail:
1811         cpufreq_cpu_put(data);
1812 no_policy:
1813         return ret;
1814 }
1815 EXPORT_SYMBOL(cpufreq_update_policy);
1816
1817 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1818                                         unsigned long action, void *hcpu)
1819 {
1820         unsigned int cpu = (unsigned long)hcpu;
1821         struct device *dev;
1822
1823         dev = get_cpu_device(cpu);
1824         if (dev) {
1825                 switch (action) {
1826                 case CPU_ONLINE:
1827                 case CPU_ONLINE_FROZEN:
1828                         cpufreq_add_dev(dev, NULL);
1829                         break;
1830                 case CPU_DOWN_PREPARE:
1831                 case CPU_DOWN_PREPARE_FROZEN:
1832                         if (unlikely(lock_policy_rwsem_write(cpu)))
1833                                 BUG();
1834
1835                         __cpufreq_remove_dev(dev, NULL);
1836                         break;
1837                 case CPU_DOWN_FAILED:
1838                 case CPU_DOWN_FAILED_FROZEN:
1839                         cpufreq_add_dev(dev, NULL);
1840                         break;
1841                 }
1842         }
1843         return NOTIFY_OK;
1844 }
1845
1846 static struct notifier_block __refdata cpufreq_cpu_notifier = {
1847     .notifier_call = cpufreq_cpu_callback,
1848 };
1849
1850 /*********************************************************************
1851  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1852  *********************************************************************/
1853
1854 /**
1855  * cpufreq_register_driver - register a CPU Frequency driver
1856  * @driver_data: A struct cpufreq_driver containing the values#
1857  * submitted by the CPU Frequency driver.
1858  *
1859  *   Registers a CPU Frequency driver to this core code. This code
1860  * returns zero on success, -EBUSY when another driver got here first
1861  * (and isn't unregistered in the meantime).
1862  *
1863  */
1864 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1865 {
1866         unsigned long flags;
1867         int ret;
1868
1869         if (cpufreq_disabled())
1870                 return -ENODEV;
1871
1872         if (!driver_data || !driver_data->verify || !driver_data->init ||
1873             ((!driver_data->setpolicy) && (!driver_data->target)))
1874                 return -EINVAL;
1875
1876         pr_debug("trying to register driver %s\n", driver_data->name);
1877
1878         if (driver_data->setpolicy)
1879                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1880
1881         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1882         if (cpufreq_driver) {
1883                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1884                 return -EBUSY;
1885         }
1886         cpufreq_driver = driver_data;
1887         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1888
1889         ret = subsys_interface_register(&cpufreq_interface);
1890         if (ret)
1891                 goto err_null_driver;
1892
1893         if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1894                 int i;
1895                 ret = -ENODEV;
1896
1897                 /* check for at least one working CPU */
1898                 for (i = 0; i < nr_cpu_ids; i++)
1899                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1900                                 ret = 0;
1901                                 break;
1902                         }
1903
1904                 /* if all ->init() calls failed, unregister */
1905                 if (ret) {
1906                         pr_debug("no CPU initialized for driver %s\n",
1907                                                         driver_data->name);
1908                         goto err_if_unreg;
1909                 }
1910         }
1911
1912         register_hotcpu_notifier(&cpufreq_cpu_notifier);
1913         pr_debug("driver %s up and running\n", driver_data->name);
1914
1915         return 0;
1916 err_if_unreg:
1917         subsys_interface_unregister(&cpufreq_interface);
1918 err_null_driver:
1919         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1920         cpufreq_driver = NULL;
1921         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1922         return ret;
1923 }
1924 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1925
1926
1927 /**
1928  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1929  *
1930  *    Unregister the current CPUFreq driver. Only call this if you have
1931  * the right to do so, i.e. if you have succeeded in initialising before!
1932  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1933  * currently not initialised.
1934  */
1935 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1936 {
1937         unsigned long flags;
1938
1939         if (!cpufreq_driver || (driver != cpufreq_driver))
1940                 return -EINVAL;
1941
1942         pr_debug("unregistering driver %s\n", driver->name);
1943
1944         subsys_interface_unregister(&cpufreq_interface);
1945         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1946
1947         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1948         cpufreq_driver = NULL;
1949         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1950
1951         return 0;
1952 }
1953 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1954
1955 static int __init cpufreq_core_init(void)
1956 {
1957         int cpu;
1958
1959         if (cpufreq_disabled())
1960                 return -ENODEV;
1961
1962         for_each_possible_cpu(cpu) {
1963                 per_cpu(cpufreq_policy_cpu, cpu) = -1;
1964                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1965         }
1966
1967         cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
1968         BUG_ON(!cpufreq_global_kobject);
1969         register_syscore_ops(&cpufreq_syscore_ops);
1970
1971         return 0;
1972 }
1973 core_initcall(cpufreq_core_init);