mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-07 11:26:02 +09:00
Merge branch 'eas-dev' into android-mainline
Merge android-specific EAS patches Bug: 120440300 Change-Id: I9e9c3dff41f1e941e57390db971d58a1457c69a3 Signed-off-by: Todd Kjos <tkjos@google.com>
This commit is contained in:
@@ -30,6 +30,9 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
|
||||
/* Replace task scheduler's default frequency-invariant accounting */
|
||||
#define arch_scale_freq_capacity topology_get_freq_scale
|
||||
|
||||
/* Replace task scheduler's default max-frequency-invariant accounting */
|
||||
#define arch_scale_max_freq_capacity topology_get_max_freq_scale
|
||||
|
||||
/* Replace task scheduler's default cpu-invariant accounting */
|
||||
#define arch_scale_cpu_capacity topology_get_cpu_scale
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ CONFIG_COMPAT=y
|
||||
CONFIG_RANDOMIZE_BASE=y
|
||||
CONFIG_HIBERNATION=y
|
||||
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
|
||||
CONFIG_ENERGY_MODEL=y
|
||||
CONFIG_ARM_CPUIDLE=y
|
||||
CONFIG_CPU_FREQ=y
|
||||
CONFIG_CPU_FREQ_STAT=y
|
||||
|
||||
@@ -42,6 +42,9 @@ int pcibus_to_node(struct pci_bus *bus);
|
||||
/* Replace task scheduler's default frequency-invariant accounting */
|
||||
#define arch_scale_freq_capacity topology_get_freq_scale
|
||||
|
||||
/* Replace task scheduler's default max-frequency-invariant accounting */
|
||||
#define arch_scale_max_freq_capacity topology_get_max_freq_scale
|
||||
|
||||
/* Replace task scheduler's default cpu-invariant accounting */
|
||||
#define arch_scale_cpu_capacity topology_get_cpu_scale
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <linux/cpuset.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
|
||||
DEFINE_PER_CPU(unsigned long, max_cpu_freq);
|
||||
DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
|
||||
|
||||
void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
|
||||
unsigned long max_freq)
|
||||
@@ -26,8 +28,29 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
|
||||
|
||||
scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
|
||||
|
||||
for_each_cpu(i, cpus)
|
||||
for_each_cpu(i, cpus) {
|
||||
per_cpu(freq_scale, i) = scale;
|
||||
per_cpu(max_cpu_freq, i) = max_freq;
|
||||
}
|
||||
}
|
||||
|
||||
void arch_set_max_freq_scale(struct cpumask *cpus,
|
||||
unsigned long policy_max_freq)
|
||||
{
|
||||
unsigned long scale, max_freq;
|
||||
int cpu = cpumask_first(cpus);
|
||||
|
||||
if (cpu > nr_cpu_ids)
|
||||
return;
|
||||
|
||||
max_freq = per_cpu(max_cpu_freq, cpu);
|
||||
if (!max_freq)
|
||||
return;
|
||||
|
||||
scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
|
||||
|
||||
for_each_cpu(cpu, cpus)
|
||||
per_cpu(max_freq_scale, cpu) = scale;
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
|
||||
|
||||
@@ -153,6 +153,12 @@ __weak void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_set_freq_scale);
|
||||
|
||||
__weak void arch_set_max_freq_scale(struct cpumask *cpus,
|
||||
unsigned long policy_max_freq)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arch_set_max_freq_scale);
|
||||
|
||||
/*
|
||||
* This is a generic cpufreq init() routine which can be used by cpufreq
|
||||
* drivers of SMP systems. It will do following:
|
||||
@@ -2416,6 +2422,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
|
||||
policy->max = new_policy->max;
|
||||
trace_cpu_frequency_limits(policy);
|
||||
|
||||
arch_set_max_freq_scale(policy->cpus, policy->max);
|
||||
|
||||
policy->cached_target_freq = UINT_MAX;
|
||||
|
||||
pr_debug("new min and max freqs are %u - %u kHz\n",
|
||||
|
||||
@@ -144,6 +144,7 @@ config THERMAL_GOV_USER_SPACE
|
||||
|
||||
config THERMAL_GOV_POWER_ALLOCATOR
|
||||
bool "Power allocator thermal governor"
|
||||
depends on ENERGY_MODEL
|
||||
help
|
||||
Enable this to manage platform thermals by dynamically
|
||||
allocating and limiting power to devices.
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpu_cooling.h>
|
||||
#include <linux/energy_model.h>
|
||||
|
||||
#include <trace/events/thermal.h>
|
||||
|
||||
@@ -36,19 +37,6 @@
|
||||
* ...
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct freq_table - frequency table along with power entries
|
||||
* @frequency: frequency in KHz
|
||||
* @power: power in mW
|
||||
*
|
||||
* This structure is built when the cooling device registers and helps
|
||||
* in translating frequency to power and vice versa.
|
||||
*/
|
||||
struct freq_table {
|
||||
u32 frequency;
|
||||
u32 power;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct time_in_idle - Idle time stats
|
||||
* @time: previous reading of the absolute time that this cpu was idle
|
||||
@@ -70,7 +58,7 @@ struct time_in_idle {
|
||||
* frequency.
|
||||
* @max_level: maximum cooling level. One less than total number of valid
|
||||
* cpufreq frequencies.
|
||||
* @freq_table: Freq table in descending order of frequencies
|
||||
* @em: Reference on the Energy Model of the device
|
||||
* @cdev: thermal_cooling_device pointer to keep track of the
|
||||
* registered cooling device.
|
||||
* @policy: cpufreq policy.
|
||||
@@ -86,7 +74,7 @@ struct cpufreq_cooling_device {
|
||||
unsigned int cpufreq_state;
|
||||
unsigned int clipped_freq;
|
||||
unsigned int max_level;
|
||||
struct freq_table *freq_table; /* In descending order */
|
||||
struct em_perf_domain *em;
|
||||
struct cpufreq_policy *policy;
|
||||
struct list_head node;
|
||||
struct time_in_idle *idle_time;
|
||||
@@ -96,28 +84,6 @@ static DEFINE_IDA(cpufreq_ida);
|
||||
static DEFINE_MUTEX(cooling_list_lock);
|
||||
static LIST_HEAD(cpufreq_cdev_list);
|
||||
|
||||
/* Below code defines functions to be used for cpufreq as cooling device */
|
||||
|
||||
/**
|
||||
* get_level: Find the level for a particular frequency
|
||||
* @cpufreq_cdev: cpufreq_cdev for which the property is required
|
||||
* @freq: Frequency
|
||||
*
|
||||
* Return: level corresponding to the frequency.
|
||||
*/
|
||||
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
unsigned int freq)
|
||||
{
|
||||
struct freq_table *freq_table = cpufreq_cdev->freq_table;
|
||||
unsigned long level;
|
||||
|
||||
for (level = 1; level <= cpufreq_cdev->max_level; level++)
|
||||
if (freq > freq_table[level].frequency)
|
||||
break;
|
||||
|
||||
return level - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
|
||||
* @nb: struct notifier_block * with callback info.
|
||||
@@ -171,104 +137,51 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
|
||||
/**
|
||||
* update_freq_table() - Update the freq table with power numbers
|
||||
* @cpufreq_cdev: the cpufreq cooling device in which to update the table
|
||||
* @capacitance: dynamic power coefficient for these cpus
|
||||
* get_level: Find the level for a particular frequency
|
||||
* @cpufreq_cdev: cpufreq_cdev for which the property is required
|
||||
* @freq: Frequency
|
||||
*
|
||||
* Update the freq table with power numbers. This table will be used in
|
||||
* cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
|
||||
* frequency efficiently. Power is stored in mW, frequency in KHz. The
|
||||
* resulting table is in descending order.
|
||||
*
|
||||
* Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
|
||||
* or -ENOMEM if we run out of memory.
|
||||
* Return: level corresponding to the frequency.
|
||||
*/
|
||||
static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
u32 capacitance)
|
||||
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
unsigned int freq)
|
||||
{
|
||||
struct freq_table *freq_table = cpufreq_cdev->freq_table;
|
||||
struct dev_pm_opp *opp;
|
||||
struct device *dev = NULL;
|
||||
int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
|
||||
int i;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
if (unlikely(!dev)) {
|
||||
pr_warn("No cpu device for cpu %d\n", cpu);
|
||||
return -ENODEV;
|
||||
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
||||
if (freq > cpufreq_cdev->em->table[i].frequency)
|
||||
break;
|
||||
}
|
||||
|
||||
num_opps = dev_pm_opp_get_opp_count(dev);
|
||||
if (num_opps < 0)
|
||||
return num_opps;
|
||||
|
||||
/*
|
||||
* The cpufreq table is also built from the OPP table and so the count
|
||||
* should match.
|
||||
*/
|
||||
if (num_opps != cpufreq_cdev->max_level + 1) {
|
||||
dev_warn(dev, "Number of OPPs not matching with max_levels\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i <= cpufreq_cdev->max_level; i++) {
|
||||
unsigned long freq = freq_table[i].frequency * 1000;
|
||||
u32 freq_mhz = freq_table[i].frequency / 1000;
|
||||
u64 power;
|
||||
u32 voltage_mv;
|
||||
|
||||
/*
|
||||
* Find ceil frequency as 'freq' may be slightly lower than OPP
|
||||
* freq due to truncation while converting to kHz.
|
||||
*/
|
||||
opp = dev_pm_opp_find_freq_ceil(dev, &freq);
|
||||
if (IS_ERR(opp)) {
|
||||
dev_err(dev, "failed to get opp for %lu frequency\n",
|
||||
freq);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
|
||||
dev_pm_opp_put(opp);
|
||||
|
||||
/*
|
||||
* Do the multiplication with MHz and millivolt so as
|
||||
* to not overflow.
|
||||
*/
|
||||
power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
|
||||
do_div(power, 1000000000);
|
||||
|
||||
/* power is stored in mW */
|
||||
freq_table[i].power = power;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return cpufreq_cdev->max_level - i - 1;
|
||||
}
|
||||
|
||||
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
u32 freq)
|
||||
{
|
||||
int i;
|
||||
struct freq_table *freq_table = cpufreq_cdev->freq_table;
|
||||
|
||||
for (i = 1; i <= cpufreq_cdev->max_level; i++)
|
||||
if (freq > freq_table[i].frequency)
|
||||
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
||||
if (freq > cpufreq_cdev->em->table[i].frequency)
|
||||
break;
|
||||
}
|
||||
|
||||
return freq_table[i - 1].power;
|
||||
return cpufreq_cdev->em->table[i + 1].power;
|
||||
}
|
||||
|
||||
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
u32 power)
|
||||
{
|
||||
int i;
|
||||
struct freq_table *freq_table = cpufreq_cdev->freq_table;
|
||||
|
||||
for (i = 1; i <= cpufreq_cdev->max_level; i++)
|
||||
if (power > freq_table[i].power)
|
||||
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
||||
if (power > cpufreq_cdev->em->table[i].power)
|
||||
break;
|
||||
}
|
||||
|
||||
return freq_table[i - 1].frequency;
|
||||
return cpufreq_cdev->em->table[i + 1].frequency;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -319,80 +232,6 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
|
||||
}
|
||||
|
||||
/* cpufreq cooling device callback functions are defined below */
|
||||
|
||||
/**
|
||||
* cpufreq_get_max_state - callback function to get the max cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: fill this variable with the max cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to return the cpufreq
|
||||
* max cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long *state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
|
||||
*state = cpufreq_cdev->max_level;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_get_cur_state - callback function to get the current cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: fill this variable with the current cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to return the cpufreq
|
||||
* current cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long *state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
|
||||
*state = cpufreq_cdev->cpufreq_state;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_set_cur_state - callback function to set the current cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: set this variable to the current cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to change the cpufreq
|
||||
* current cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
unsigned int clip_freq;
|
||||
|
||||
/* Request state should be less than max_level */
|
||||
if (WARN_ON(state > cpufreq_cdev->max_level))
|
||||
return -EINVAL;
|
||||
|
||||
/* Check if the old cooling action is same as new cooling action */
|
||||
if (cpufreq_cdev->cpufreq_state == state)
|
||||
return 0;
|
||||
|
||||
clip_freq = cpufreq_cdev->freq_table[state].frequency;
|
||||
cpufreq_cdev->cpufreq_state = state;
|
||||
cpufreq_cdev->clipped_freq = clip_freq;
|
||||
|
||||
cpufreq_update_policy(cpufreq_cdev->policy->cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_get_requested_power() - get the current power
|
||||
* @cdev: &thermal_cooling_device pointer
|
||||
@@ -483,7 +322,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
|
||||
struct thermal_zone_device *tz,
|
||||
unsigned long state, u32 *power)
|
||||
{
|
||||
unsigned int freq, num_cpus;
|
||||
unsigned int freq, num_cpus, idx;
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
|
||||
/* Request state should be less than max_level */
|
||||
@@ -492,7 +331,8 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
|
||||
|
||||
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
|
||||
|
||||
freq = cpufreq_cdev->freq_table[state].frequency;
|
||||
idx = cpufreq_cdev->max_level - state;
|
||||
freq = cpufreq_cdev->em->table[idx].frequency;
|
||||
*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
|
||||
|
||||
return 0;
|
||||
@@ -537,21 +377,139 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
struct em_perf_domain *em) {
|
||||
struct cpufreq_policy *policy;
|
||||
unsigned int nr_levels;
|
||||
|
||||
if (!em)
|
||||
return false;
|
||||
|
||||
policy = cpufreq_cdev->policy;
|
||||
if (!cpumask_equal(policy->related_cpus, to_cpumask(em->cpus))) {
|
||||
pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n",
|
||||
cpumask_pr_args(to_cpumask(em->cpus)),
|
||||
cpumask_pr_args(policy->related_cpus));
|
||||
return false;
|
||||
}
|
||||
|
||||
nr_levels = cpufreq_cdev->max_level + 1;
|
||||
if (em->nr_cap_states != nr_levels) {
|
||||
pr_err("The number of cap states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n",
|
||||
cpumask_pr_args(to_cpumask(em->cpus)),
|
||||
em->nr_cap_states, nr_levels);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
|
||||
|
||||
static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||
unsigned long state)
|
||||
{
|
||||
struct cpufreq_policy *policy;
|
||||
unsigned long idx;
|
||||
|
||||
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
|
||||
/* Use the Energy Model table if available */
|
||||
if (cpufreq_cdev->em) {
|
||||
idx = cpufreq_cdev->max_level - state;
|
||||
return cpufreq_cdev->em->table[idx].frequency;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Otherwise, fallback on the CPUFreq table */
|
||||
policy = cpufreq_cdev->policy;
|
||||
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
|
||||
idx = cpufreq_cdev->max_level - state;
|
||||
else
|
||||
idx = state;
|
||||
|
||||
return policy->freq_table[idx].frequency;
|
||||
}
|
||||
|
||||
|
||||
/* cpufreq cooling device callback functions are defined below */
|
||||
|
||||
/**
|
||||
* cpufreq_get_max_state - callback function to get the max cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: fill this variable with the max cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to return the cpufreq
|
||||
* max cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long *state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
|
||||
*state = cpufreq_cdev->max_level;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_get_cur_state - callback function to get the current cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: fill this variable with the current cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to return the cpufreq
|
||||
* current cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long *state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
|
||||
*state = cpufreq_cdev->cpufreq_state;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpufreq_set_cur_state - callback function to set the current cooling state.
|
||||
* @cdev: thermal cooling device pointer.
|
||||
* @state: set this variable to the current cooling state.
|
||||
*
|
||||
* Callback for the thermal cooling device to change the cpufreq
|
||||
* current cooling state.
|
||||
*
|
||||
* Return: 0 on success, an error code otherwise.
|
||||
*/
|
||||
static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
|
||||
unsigned long state)
|
||||
{
|
||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||
unsigned int clip_freq;
|
||||
|
||||
/* Request state should be less than max_level */
|
||||
if (WARN_ON(state > cpufreq_cdev->max_level))
|
||||
return -EINVAL;
|
||||
|
||||
/* Check if the old cooling action is same as new cooling action */
|
||||
if (cpufreq_cdev->cpufreq_state == state)
|
||||
return 0;
|
||||
|
||||
clip_freq = get_state_freq(cpufreq_cdev, state);
|
||||
cpufreq_cdev->cpufreq_state = state;
|
||||
cpufreq_cdev->clipped_freq = clip_freq;
|
||||
|
||||
cpufreq_update_policy(cpufreq_cdev->policy->cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Bind cpufreq callbacks to thermal cooling device ops */
|
||||
|
||||
static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
|
||||
.get_max_state = cpufreq_get_max_state,
|
||||
.get_cur_state = cpufreq_get_cur_state,
|
||||
.set_cur_state = cpufreq_set_cur_state,
|
||||
};
|
||||
|
||||
static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
|
||||
.get_max_state = cpufreq_get_max_state,
|
||||
.get_cur_state = cpufreq_get_cur_state,
|
||||
.set_cur_state = cpufreq_set_cur_state,
|
||||
.get_requested_power = cpufreq_get_requested_power,
|
||||
.state2power = cpufreq_state2power,
|
||||
.power2state = cpufreq_power2state,
|
||||
};
|
||||
|
||||
/* Notifier for cpufreq policy change */
|
||||
@@ -559,26 +517,12 @@ static struct notifier_block thermal_cpufreq_notifier_block = {
|
||||
.notifier_call = cpufreq_thermal_notifier,
|
||||
};
|
||||
|
||||
static unsigned int find_next_max(struct cpufreq_frequency_table *table,
|
||||
unsigned int prev_max)
|
||||
{
|
||||
struct cpufreq_frequency_table *pos;
|
||||
unsigned int max = 0;
|
||||
|
||||
cpufreq_for_each_valid_entry(pos, table) {
|
||||
if (pos->frequency > max && pos->frequency < prev_max)
|
||||
max = pos->frequency;
|
||||
}
|
||||
|
||||
return max;
|
||||
}
|
||||
|
||||
/**
|
||||
* __cpufreq_cooling_register - helper function to create cpufreq cooling device
|
||||
* @np: a valid struct device_node to the cooling device device tree node
|
||||
* @policy: cpufreq policy
|
||||
* Normally this should be same as cpufreq policy->related_cpus.
|
||||
* @capacitance: dynamic power coefficient for these cpus
|
||||
* @em: Energy Model of the cpufreq policy
|
||||
*
|
||||
* This interface function registers the cpufreq cooling device with the name
|
||||
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
|
||||
@@ -590,12 +534,13 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
|
||||
*/
|
||||
static struct thermal_cooling_device *
|
||||
__cpufreq_cooling_register(struct device_node *np,
|
||||
struct cpufreq_policy *policy, u32 capacitance)
|
||||
struct cpufreq_policy *policy,
|
||||
struct em_perf_domain *em)
|
||||
{
|
||||
struct thermal_cooling_device *cdev;
|
||||
struct cpufreq_cooling_device *cpufreq_cdev;
|
||||
char dev_name[THERMAL_NAME_LENGTH];
|
||||
unsigned int freq, i, num_cpus;
|
||||
unsigned int i, num_cpus;
|
||||
int ret;
|
||||
struct thermal_cooling_device_ops *cooling_ops;
|
||||
bool first;
|
||||
@@ -629,46 +574,30 @@ __cpufreq_cooling_register(struct device_node *np,
|
||||
/* max_level is an index, not a counter */
|
||||
cpufreq_cdev->max_level = i - 1;
|
||||
|
||||
cpufreq_cdev->freq_table = kmalloc_array(i,
|
||||
sizeof(*cpufreq_cdev->freq_table),
|
||||
GFP_KERNEL);
|
||||
if (!cpufreq_cdev->freq_table) {
|
||||
cdev = ERR_PTR(-ENOMEM);
|
||||
goto free_idle_time;
|
||||
}
|
||||
|
||||
ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
cdev = ERR_PTR(ret);
|
||||
goto free_table;
|
||||
goto free_idle_time;
|
||||
}
|
||||
cpufreq_cdev->id = ret;
|
||||
|
||||
snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
|
||||
cpufreq_cdev->id);
|
||||
|
||||
/* Fill freq-table in descending order of frequencies */
|
||||
for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
|
||||
freq = find_next_max(policy->freq_table, freq);
|
||||
cpufreq_cdev->freq_table[i].frequency = freq;
|
||||
|
||||
/* Warn for duplicate entries */
|
||||
if (!freq)
|
||||
pr_warn("%s: table has duplicate entries\n", __func__);
|
||||
else
|
||||
pr_debug("%s: freq:%u KHz\n", __func__, freq);
|
||||
}
|
||||
|
||||
if (capacitance) {
|
||||
ret = update_freq_table(cpufreq_cdev, capacitance);
|
||||
if (ret) {
|
||||
cdev = ERR_PTR(ret);
|
||||
goto remove_ida;
|
||||
}
|
||||
|
||||
cooling_ops = &cpufreq_power_cooling_ops;
|
||||
} else {
|
||||
cooling_ops = &cpufreq_cooling_ops;
|
||||
cooling_ops = &cpufreq_cooling_ops;
|
||||
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
|
||||
if (em_is_sane(cpufreq_cdev, em)) {
|
||||
cpufreq_cdev->em = em;
|
||||
cooling_ops->get_requested_power = cpufreq_get_requested_power;
|
||||
cooling_ops->state2power = cpufreq_state2power;
|
||||
cooling_ops->power2state = cpufreq_power2state;
|
||||
} else
|
||||
#endif
|
||||
if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED) {
|
||||
pr_err("%s: unsorted frequency tables are not supported\n",
|
||||
__func__);
|
||||
cdev = ERR_PTR(-EINVAL);
|
||||
goto remove_ida;
|
||||
}
|
||||
|
||||
cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
|
||||
@@ -676,7 +605,7 @@ __cpufreq_cooling_register(struct device_node *np,
|
||||
if (IS_ERR(cdev))
|
||||
goto remove_ida;
|
||||
|
||||
cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
|
||||
cpufreq_cdev->clipped_freq = get_state_freq(cpufreq_cdev, 0);
|
||||
|
||||
mutex_lock(&cooling_list_lock);
|
||||
/* Register the notifier for first cpufreq cooling device */
|
||||
@@ -692,8 +621,6 @@ __cpufreq_cooling_register(struct device_node *np,
|
||||
|
||||
remove_ida:
|
||||
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
|
||||
free_table:
|
||||
kfree(cpufreq_cdev->freq_table);
|
||||
free_idle_time:
|
||||
kfree(cpufreq_cdev->idle_time);
|
||||
free_cdev:
|
||||
@@ -715,7 +642,7 @@ free_cdev:
|
||||
struct thermal_cooling_device *
|
||||
cpufreq_cooling_register(struct cpufreq_policy *policy)
|
||||
{
|
||||
return __cpufreq_cooling_register(NULL, policy, 0);
|
||||
return __cpufreq_cooling_register(NULL, policy, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
|
||||
|
||||
@@ -743,7 +670,6 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
|
||||
{
|
||||
struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
|
||||
struct thermal_cooling_device *cdev = NULL;
|
||||
u32 capacitance = 0;
|
||||
|
||||
if (!np) {
|
||||
pr_err("cpu_cooling: OF node not available for cpu%d\n",
|
||||
@@ -752,10 +678,9 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
|
||||
}
|
||||
|
||||
if (of_find_property(np, "#cooling-cells", NULL)) {
|
||||
of_property_read_u32(np, "dynamic-power-coefficient",
|
||||
&capacitance);
|
||||
struct em_perf_domain *em = em_cpu_get(policy->cpu);
|
||||
|
||||
cdev = __cpufreq_cooling_register(np, policy, capacitance);
|
||||
cdev = __cpufreq_cooling_register(np, policy, em);
|
||||
if (IS_ERR(cdev)) {
|
||||
pr_err("cpu_cooling: cpu%d failed to register as cooling device: %ld\n",
|
||||
policy->cpu, PTR_ERR(cdev));
|
||||
@@ -797,7 +722,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
|
||||
thermal_cooling_device_unregister(cdev);
|
||||
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
|
||||
kfree(cpufreq_cdev->idle_time);
|
||||
kfree(cpufreq_cdev->freq_table);
|
||||
kfree(cpufreq_cdev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
|
||||
|
||||
@@ -33,4 +33,12 @@ unsigned long topology_get_freq_scale(int cpu)
|
||||
return per_cpu(freq_scale, cpu);
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(unsigned long, max_freq_scale);
|
||||
|
||||
static inline
|
||||
unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return per_cpu(max_freq_scale, cpu);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_ARCH_TOPOLOGY_H_ */
|
||||
|
||||
@@ -984,6 +984,8 @@ extern unsigned int arch_freq_get_on_cpu(int cpu);
|
||||
|
||||
extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
|
||||
unsigned long max_freq);
|
||||
extern void arch_set_max_freq_scale(struct cpumask *cpus,
|
||||
unsigned long policy_max_freq);
|
||||
|
||||
/* the following are really really optional */
|
||||
extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
|
||||
|
||||
@@ -6226,7 +6226,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
|
||||
return 0;
|
||||
|
||||
min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
|
||||
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
|
||||
max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;
|
||||
|
||||
/* Minimum capacity is close to max, no need to abort wake_affine */
|
||||
if (max_cap - min_cap < max_cap >> 3)
|
||||
@@ -7141,6 +7141,7 @@ struct lb_env {
|
||||
int new_dst_cpu;
|
||||
enum cpu_idle_type idle;
|
||||
long imbalance;
|
||||
unsigned int src_grp_nr_running;
|
||||
/* The set of CPUs under consideration for load-balancing */
|
||||
struct cpumask *cpus;
|
||||
|
||||
@@ -7754,10 +7755,9 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
|
||||
};
|
||||
}
|
||||
|
||||
static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
|
||||
static unsigned long scale_rt_capacity(int cpu, unsigned long max)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long max = arch_scale_cpu_capacity(cpu);
|
||||
unsigned long used, free;
|
||||
unsigned long irq;
|
||||
|
||||
@@ -7777,12 +7777,47 @@ static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
|
||||
return scale_irq_capacity(free, irq, max);
|
||||
}
|
||||
|
||||
void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
|
||||
raw_spin_lock_init(&mcc->lock);
|
||||
mcc->val = 0;
|
||||
mcc->cpu = -1;
|
||||
}
|
||||
|
||||
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long capacity = scale_rt_capacity(sd, cpu);
|
||||
unsigned long capacity = arch_scale_cpu_capacity(cpu);
|
||||
struct sched_group *sdg = sd->groups;
|
||||
struct max_cpu_capacity *mcc;
|
||||
unsigned long max_capacity;
|
||||
int max_cap_cpu;
|
||||
unsigned long flags;
|
||||
|
||||
cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
|
||||
cpu_rq(cpu)->cpu_capacity_orig = capacity;
|
||||
|
||||
capacity *= arch_scale_max_freq_capacity(sd, cpu);
|
||||
capacity >>= SCHED_CAPACITY_SHIFT;
|
||||
|
||||
mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
|
||||
|
||||
raw_spin_lock_irqsave(&mcc->lock, flags);
|
||||
max_capacity = mcc->val;
|
||||
max_cap_cpu = mcc->cpu;
|
||||
|
||||
if ((max_capacity > capacity && max_cap_cpu == cpu) ||
|
||||
(max_capacity < capacity)) {
|
||||
mcc->val = capacity;
|
||||
mcc->cpu = cpu;
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
raw_spin_unlock_irqrestore(&mcc->lock, flags);
|
||||
printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
|
||||
cpu, capacity);
|
||||
goto skip_unlock;
|
||||
#endif
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&mcc->lock, flags);
|
||||
|
||||
skip_unlock: __attribute__ ((unused));
|
||||
capacity = scale_rt_capacity(cpu, capacity);
|
||||
|
||||
if (!capacity)
|
||||
capacity = 1;
|
||||
@@ -7886,7 +7921,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
|
||||
static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
|
||||
{
|
||||
return rq->misfit_task_load &&
|
||||
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
|
||||
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val ||
|
||||
check_cpu_capacity(rq, sd));
|
||||
}
|
||||
|
||||
@@ -8291,6 +8326,8 @@ next_group:
|
||||
if (env->sd->flags & SD_NUMA)
|
||||
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
|
||||
|
||||
env->src_grp_nr_running = sds->busiest_stat.sum_nr_running;
|
||||
|
||||
if (!env->sd->parent) {
|
||||
struct root_domain *rd = env->dst_rq->rd;
|
||||
|
||||
@@ -8419,7 +8456,22 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
capa_move /= SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* Move if we gain throughput */
|
||||
if (capa_move > capa_now)
|
||||
if (capa_move > capa_now) {
|
||||
env->imbalance = busiest->load_per_task;
|
||||
return;
|
||||
}
|
||||
|
||||
/* We can't see throughput improvement with the load-based
|
||||
* method, but it is possible depending upon group size and
|
||||
* capacity range that there might still be an underutilized
|
||||
* cpu available in an asymmetric capacity system. Do one last
|
||||
* check just in case.
|
||||
*/
|
||||
if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
|
||||
busiest->group_type == group_overloaded &&
|
||||
busiest->sum_nr_running > busiest->group_weight &&
|
||||
local->sum_nr_running < local->group_weight &&
|
||||
local->group_capacity < busiest->group_capacity)
|
||||
env->imbalance = busiest->load_per_task;
|
||||
}
|
||||
|
||||
@@ -8488,8 +8540,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
(sds->avg_load - local->avg_load) * local->group_capacity
|
||||
) / SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* Boost imbalance to allow misfit task to be balanced. */
|
||||
if (busiest->group_type == group_misfit_task) {
|
||||
/* Boost imbalance to allow misfit task to be balanced.
|
||||
* Always do this if we are doing a NEWLY_IDLE balance
|
||||
* on the assumption that any tasks we have must not be
|
||||
* long-running (and hence we cannot rely upon load).
|
||||
* However if we are not idle, we should assume the tasks
|
||||
* we have are longer running and not override load-based
|
||||
* calculations above unless we are sure that the local
|
||||
* group is underutilized.
|
||||
*/
|
||||
if (busiest->group_type == group_misfit_task &&
|
||||
(env->idle == CPU_NEWLY_IDLE ||
|
||||
local->sum_nr_running < local->group_weight)) {
|
||||
env->imbalance = max_t(long, env->imbalance,
|
||||
busiest->group_misfit_task_load);
|
||||
}
|
||||
@@ -8767,6 +8829,17 @@ static int need_active_balance(struct lb_env *env)
|
||||
if (voluntary_active_balance(env))
|
||||
return 1;
|
||||
|
||||
if ((capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) &&
|
||||
env->src_rq->cfs.h_nr_running == 1 &&
|
||||
cpu_overutilized(env->src_cpu) &&
|
||||
!cpu_overutilized(env->dst_cpu)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (env->src_grp_type == group_overloaded && env->src_rq->misfit_task_load)
|
||||
return 1;
|
||||
|
||||
|
||||
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
|
||||
}
|
||||
|
||||
@@ -8985,7 +9058,8 @@ more_balance:
|
||||
* excessive cache_hot migrations and active balances.
|
||||
*/
|
||||
if (idle != CPU_NEWLY_IDLE)
|
||||
sd->nr_balance_failed++;
|
||||
if (env.src_grp_nr_running > 1)
|
||||
sd->nr_balance_failed++;
|
||||
|
||||
if (need_active_balance(&env)) {
|
||||
unsigned long flags;
|
||||
|
||||
@@ -710,6 +710,12 @@ struct perf_domain {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct max_cpu_capacity {
|
||||
raw_spinlock_t lock;
|
||||
unsigned long val;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
/* Scheduling group status flags */
|
||||
#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
|
||||
#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
|
||||
@@ -768,7 +774,8 @@ struct root_domain {
|
||||
cpumask_var_t rto_mask;
|
||||
struct cpupri cpupri;
|
||||
|
||||
unsigned long max_cpu_capacity;
|
||||
/* Maximum cpu capacity in the system. */
|
||||
struct max_cpu_capacity max_cpu_capacity;
|
||||
|
||||
/*
|
||||
* NULL-terminated list of performance domains intersecting with the
|
||||
@@ -781,6 +788,7 @@ extern struct root_domain def_root_domain;
|
||||
extern struct mutex sched_domains_mutex;
|
||||
|
||||
extern void init_defrootdomain(void);
|
||||
extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
|
||||
extern int sched_init_domains(const struct cpumask *cpu_map);
|
||||
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
|
||||
extern void sched_get_rd(struct root_domain *rd);
|
||||
@@ -1942,6 +1950,15 @@ unsigned long arch_scale_freq_capacity(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_scale_max_freq_capacity
|
||||
struct sched_domain;
|
||||
static __always_inline
|
||||
unsigned long arch_scale_max_freq_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_PREEMPT
|
||||
|
||||
|
||||
@@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd)
|
||||
|
||||
if (cpupri_init(&rd->cpupri) != 0)
|
||||
goto free_cpudl;
|
||||
|
||||
init_max_cpu_capacity(&rd->max_cpu_capacity);
|
||||
|
||||
return 0;
|
||||
|
||||
free_cpudl:
|
||||
@@ -1930,7 +1933,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
enum s_alloc alloc_state;
|
||||
struct sched_domain *sd;
|
||||
struct s_data d;
|
||||
struct rq *rq = NULL;
|
||||
int i, ret = -ENOMEM;
|
||||
struct sched_domain_topology_level *tl_asym;
|
||||
bool has_asym = false;
|
||||
@@ -1993,13 +1995,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
/* Attach the domains */
|
||||
rcu_read_lock();
|
||||
for_each_cpu(i, cpu_map) {
|
||||
rq = cpu_rq(i);
|
||||
sd = *per_cpu_ptr(d.sd, i);
|
||||
|
||||
/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
|
||||
if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
|
||||
WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
|
||||
|
||||
cpu_attach_domain(sd, d.rd, i);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@@ -2007,11 +2003,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
|
||||
if (has_asym)
|
||||
static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
|
||||
|
||||
if (rq && sched_debug_enabled) {
|
||||
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
|
||||
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
error:
|
||||
__free_domain_allocs(&d, alloc_state, cpu_map);
|
||||
|
||||
Reference in New Issue
Block a user