blob: 794ef1e7e24c39855dd9b99a81fa4dd025ec3aea [file] [log] [blame]
/*
* Copyright (c) 2012 NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#include <linux/kernel.h>
#include <linux/cpuquiet.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/cpufreq.h>
#include <linux/pm_qos.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <asm/cputime.h>
#define CPUNAMELEN 8
typedef enum {
CPU_SPEED_BALANCED,
CPU_SPEED_BIASED,
CPU_SPEED_SKEWED,
} CPU_SPEED_BALANCE;
typedef enum {
IDLE,
DOWN,
UP,
} BALANCED_STATE;
struct idle_info {
u64 idle_last;
u64 last_timestamp;
u64 idle_current;
u64 timestamp;
};
static DEFINE_PER_CPU(struct idle_info, idleinfo);
static DEFINE_PER_CPU(unsigned int, cpu_load);
static struct timer_list load_timer;
static bool load_timer_active;
/* configurable parameters */
static unsigned int balance_level = 60;
static unsigned int idle_bottom_freq;
static unsigned int idle_top_freq;
static unsigned long up_delay;
static unsigned long down_delay;
static unsigned long last_change_time;
static unsigned int load_sample_rate = 20; /* msec */
static struct workqueue_struct *balanced_wq;
static struct delayed_work balanced_work;
static BALANCED_STATE balanced_state;
static struct kobject *balanced_kobject;
static void calculate_load_timer(unsigned long data)
{
int i;
u64 idle_time, elapsed_time;
if (!load_timer_active)
return;
for_each_online_cpu(i) {
struct idle_info *iinfo = &per_cpu(idleinfo, i);
unsigned int *load = &per_cpu(cpu_load, i);
iinfo->idle_last = iinfo->idle_current;
iinfo->last_timestamp = iinfo->timestamp;
iinfo->idle_current =
get_cpu_idle_time_us(i, &iinfo->timestamp);
elapsed_time = iinfo->timestamp - iinfo->last_timestamp;
idle_time = iinfo->idle_current - iinfo->idle_last;
idle_time *= 100;
do_div(idle_time, elapsed_time);
*load = 100 - idle_time;
}
mod_timer(&load_timer, jiffies + msecs_to_jiffies(load_sample_rate));
}
static void start_load_timer(void)
{
int i;
if (load_timer_active)
return;
load_timer_active = true;
for_each_online_cpu(i) {
struct idle_info *iinfo = &per_cpu(idleinfo, i);
iinfo->idle_current =
get_cpu_idle_time_us(i, &iinfo->timestamp);
}
mod_timer(&load_timer, jiffies + msecs_to_jiffies(100));
}
static void stop_load_timer(void)
{
if (!load_timer_active)
return;
load_timer_active = false;
del_timer(&load_timer);
}
static unsigned int get_slowest_cpu_n(void)
{
unsigned int cpu = nr_cpu_ids;
unsigned long minload = ULONG_MAX;
int i;
for_each_online_cpu(i) {
unsigned int *load = &per_cpu(cpu_load, i);
if ((i > 0) && (minload > *load)) {
cpu = i;
minload = *load;
}
}
return cpu;
}
static unsigned int cpu_highest_speed(void)
{
unsigned int maxload = 0;
int i;
for_each_online_cpu(i) {
unsigned int *load = &per_cpu(cpu_load, i);
maxload = max(maxload, *load);
}
return maxload;
}
static unsigned int count_slow_cpus(unsigned int limit)
{
unsigned int cnt = 0;
int i;
for_each_online_cpu(i) {
unsigned int *load = &per_cpu(cpu_load, i);
if (*load <= limit)
cnt++;
}
return cnt;
}
#define NR_FSHIFT 2
static unsigned int rt_profile_sel;
static unsigned int core_bias; //Dummy variable exposed to userspace
static unsigned int rt_profile_default[] = {
/* 1, 2, 3, 4 - on-line cpus target */
5, 9, 10, UINT_MAX
};
static unsigned int rt_profile_1[] = {
/* 1, 2, 3, 4 - on-line cpus target */
8, 9, 10, UINT_MAX
};
static unsigned int rt_profile_2[] = {
/* 1, 2, 3, 4 - on-line cpus target */
5, 13, 14, UINT_MAX
};
static unsigned int rt_profile_disable[] = {
/* 1, 2, 3, 4 - on-line cpus target */
0, 0, 0, UINT_MAX
};
static unsigned int *rt_profiles[] = {
rt_profile_default,
rt_profile_1,
rt_profile_2,
rt_profile_disable
};
static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */
static unsigned int nr_run_last;
struct runnables_avg_sample {
u64 previous_integral;
unsigned int avg;
bool integral_sampled;
u64 prev_timestamp;
};
static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
static unsigned int get_avg_nr_runnables(void)
{
unsigned int i, sum = 0;
struct runnables_avg_sample *sample;
u64 integral, old_integral, delta_integral, delta_time, cur_time;
for_each_online_cpu(i) {
sample = &per_cpu(avg_nr_sample, i);
integral = nr_running_integral(i);
old_integral = sample->previous_integral;
sample->previous_integral = integral;
cur_time = ktime_to_ns(ktime_get());
delta_time = cur_time - sample->prev_timestamp;
sample->prev_timestamp = cur_time;
if (!sample->integral_sampled) {
sample->integral_sampled = true;
/* First sample to initialize prev_integral, skip
* avg calculation
*/
continue;
}
if (integral < old_integral) {
/* Overflow */
delta_integral = (ULLONG_MAX - old_integral) + integral;
} else {
delta_integral = integral - old_integral;
}
/* Calculate average for the previous sample window */
do_div(delta_integral, delta_time);
sample->avg = delta_integral;
sum += sample->avg;
}
return sum;
}
static CPU_SPEED_BALANCE balanced_speed_balance(void)
{
unsigned long highest_speed = cpu_highest_speed();
unsigned long balanced_speed = highest_speed * balance_level / 100;
unsigned long skewed_speed = balanced_speed / 2;
unsigned int nr_cpus = num_online_cpus();
unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
unsigned int avg_nr_run = get_avg_nr_runnables();
unsigned int nr_run;
unsigned int *current_profile = rt_profiles[rt_profile_sel];
/* balanced: freq targets for all CPUs are above 50% of highest speed
biased: freq target for at least one CPU is below 50% threshold
skewed: freq targets for at least 2 CPUs are below 25% threshold */
for (nr_run = 1; nr_run < ARRAY_SIZE(rt_profile_default); nr_run++) {
unsigned int nr_threshold = current_profile[nr_run - 1];
if (nr_run_last <= nr_run)
nr_threshold += nr_run_hysteresis;
if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT)))
break;
}
nr_run_last = nr_run;
if (count_slow_cpus(skewed_speed) >= 2 || nr_cpus > max_cpus ||
nr_run < nr_cpus)
return CPU_SPEED_SKEWED;
if (count_slow_cpus(balanced_speed) >= 1 || nr_cpus == max_cpus ||
nr_run <= nr_cpus)
return CPU_SPEED_BIASED;
return CPU_SPEED_BALANCED;
}
static void balanced_work_func(struct work_struct *work)
{
bool up = false;
unsigned int cpu = nr_cpu_ids;
unsigned long now = jiffies;
CPU_SPEED_BALANCE balance;
switch (balanced_state) {
case IDLE:
break;
case DOWN:
cpu = get_slowest_cpu_n();
if (cpu < nr_cpu_ids) {
up = false;
queue_delayed_work(balanced_wq,
&balanced_work, up_delay);
} else
stop_load_timer();
break;
case UP:
balance = balanced_speed_balance();
switch (balance) {
/* cpu speed is up and balanced - one more on-line */
case CPU_SPEED_BALANCED:
cpu = cpumask_next_zero(0, cpu_online_mask);
if (cpu < nr_cpu_ids)
up = true;
break;
/* cpu speed is up, but skewed - remove one core */
case CPU_SPEED_SKEWED:
cpu = get_slowest_cpu_n();
if (cpu < nr_cpu_ids)
up = false;
break;
/* cpu speed is up, but under-utilized - do nothing */
case CPU_SPEED_BIASED:
default:
break;
}
queue_delayed_work(
balanced_wq, &balanced_work, up_delay);
break;
default:
pr_err("%s: invalid cpuquiet balanced governor state %d\n",
__func__, balanced_state);
}
if (!up && ((now - last_change_time) < down_delay))
cpu = nr_cpu_ids;
if (cpu < nr_cpu_ids) {
last_change_time = now;
if (up)
cpuquiet_wake_cpu(cpu, false);
else
cpuquiet_quiesence_cpu(cpu, false);
}
}
static int balanced_cpufreq_transition(struct notifier_block *nb,
unsigned long state, void *data)
{
struct cpufreq_freqs *freqs = data;
unsigned long cpu_freq;
if (state == CPUFREQ_POSTCHANGE || state == CPUFREQ_RESUMECHANGE) {
cpu_freq = freqs->new;
switch (balanced_state) {
case IDLE:
if (cpu_freq >= idle_top_freq) {
balanced_state = UP;
queue_delayed_work(
balanced_wq, &balanced_work, up_delay);
start_load_timer();
} else if (cpu_freq <= idle_bottom_freq) {
balanced_state = DOWN;
queue_delayed_work(
balanced_wq, &balanced_work,
down_delay);
start_load_timer();
}
break;
case DOWN:
if (cpu_freq >= idle_top_freq) {
balanced_state = UP;
queue_delayed_work(
balanced_wq, &balanced_work, up_delay);
start_load_timer();
}
break;
case UP:
if (cpu_freq <= idle_bottom_freq) {
balanced_state = DOWN;
queue_delayed_work(balanced_wq,
&balanced_work, up_delay);
start_load_timer();
}
break;
default:
pr_err("%s: invalid cpuquiet balanced governor "
"state %d\n", __func__, balanced_state);
}
}
return NOTIFY_OK;
}
static struct notifier_block balanced_cpufreq_nb = {
.notifier_call = balanced_cpufreq_transition,
};
static void delay_callback(struct cpuquiet_attribute *attr)
{
unsigned long val;
if (attr) {
val = (*((unsigned long *)(attr->param)));
(*((unsigned long *)(attr->param))) = msecs_to_jiffies(val);
}
}
static void core_bias_callback (struct cpuquiet_attribute *attr)
{
unsigned long val;
if (attr) {
val = (*((unsigned int*)(attr->param)));
if (val < ARRAY_SIZE(rt_profiles)) {
rt_profile_sel = val;
}
else { //Revert the change due to invalid range
core_bias = rt_profile_sel;
}
}
}
CPQ_BASIC_ATTRIBUTE(balance_level, 0644, uint);
CPQ_BASIC_ATTRIBUTE(idle_bottom_freq, 0644, uint);
CPQ_BASIC_ATTRIBUTE(idle_top_freq, 0644, uint);
CPQ_BASIC_ATTRIBUTE(load_sample_rate, 0644, uint);
CPQ_ATTRIBUTE(core_bias, 0644, uint, core_bias_callback);
CPQ_ATTRIBUTE(up_delay, 0644, ulong, delay_callback);
CPQ_ATTRIBUTE(down_delay, 0644, ulong, delay_callback);
static struct attribute *balanced_attributes[] = {
&balance_level_attr.attr,
&idle_bottom_freq_attr.attr,
&idle_top_freq_attr.attr,
&up_delay_attr.attr,
&down_delay_attr.attr,
&load_sample_rate_attr.attr,
&core_bias_attr.attr,
NULL,
};
static const struct sysfs_ops balanced_sysfs_ops = {
.show = cpuquiet_auto_sysfs_show,
.store = cpuquiet_auto_sysfs_store,
};
static struct kobj_type ktype_balanced = {
.sysfs_ops = &balanced_sysfs_ops,
.default_attrs = balanced_attributes,
};
static int balanced_sysfs(void)
{
int err;
balanced_kobject = kzalloc(sizeof(*balanced_kobject),
GFP_KERNEL);
if (!balanced_kobject)
return -ENOMEM;
err = cpuquiet_kobject_init(balanced_kobject, &ktype_balanced,
"balanced");
if (err)
kfree(balanced_kobject);
return err;
}
static void balanced_stop(void)
{
/*
first unregister the notifiers. This ensures the governor state
can't be modified by a cpufreq transition
*/
cpufreq_unregister_notifier(&balanced_cpufreq_nb,
CPUFREQ_TRANSITION_NOTIFIER);
/* now we can force the governor to be idle */
balanced_state = IDLE;
cancel_delayed_work_sync(&balanced_work);
destroy_workqueue(balanced_wq);
del_timer(&load_timer);
kobject_put(balanced_kobject);
}
static int balanced_start(void)
{
int err, count;
struct cpufreq_frequency_table *table;
struct cpufreq_freqs initial_freq;
err = balanced_sysfs();
if (err)
return err;
balanced_wq = alloc_workqueue("cpuquiet-balanced",
WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
if (!balanced_wq)
return -ENOMEM;
INIT_DELAYED_WORK(&balanced_work, balanced_work_func);
up_delay = msecs_to_jiffies(100);
down_delay = msecs_to_jiffies(2000);
table = cpufreq_frequency_get_table(0);
if (!table)
return -EINVAL;
for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++);
if (count < 4)
return -EINVAL;
idle_top_freq = table[(count / 2) - 1].frequency;
idle_bottom_freq = table[(count / 2) - 2].frequency;
cpufreq_register_notifier(&balanced_cpufreq_nb,
CPUFREQ_TRANSITION_NOTIFIER);
init_timer(&load_timer);
load_timer.function = calculate_load_timer;
/*FIXME: Kick start the state machine by faking a freq notification*/
initial_freq.new = cpufreq_get(0);
if (initial_freq.new != 0)
balanced_cpufreq_transition(NULL, CPUFREQ_RESUMECHANGE,
&initial_freq);
return 0;
}
struct cpuquiet_governor balanced_governor = {
.name = "balanced",
.start = balanced_start,
.stop = balanced_stop,
.owner = THIS_MODULE,
};
static int __init init_balanced(void)
{
return cpuquiet_register_governor(&balanced_governor);
}
static void __exit exit_balanced(void)
{
cpuquiet_unregister_governor(&balanced_governor);
}
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPUQUIET_DEFAULT_GOV_BALANCED
fs_initcall(init_balanced);
#else
module_init(init_balanced);
#endif
module_exit(exit_balanced);