mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-09 20:32:04 +09:00
cpuquiet: Update averaging of nr_runnables
Doing a Exponential moving average per nr_running++/-- does not guarantee a fixed sample rate which induces errors if there are lots of threads being enqueued/dequeued from the rq (Linpack mt). Instead of keeping track of the avg, the scheduler now keeps track of the integral of nr_running and allows the readers to perform filtering on top. Implemented a proper exponential moving average for the runnables governor and a straight 100ms average for the balanced governor. Tweaked the thresholds for the runnables governor to minimize latency. Also, decreased sample_rate for the runnables governor to the absolute minimum of 10msecs. Updated to K3.4 Change-Id: Ia25bf8baf2a1a015ba188b2c06e551e89b16c5f8 Signed-off-by: Sai Charan Gurrappadi <sgurrappadi@nvidia.com> Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com> Reviewed-on: http://git-master/r/131147 Reviewed-by: Juha Tukkinen <jtukkinen@nvidia.com>
This commit is contained in:
committed by
Huang, Tao
parent
14d64ba7d1
commit
f0b3996549
@@ -204,6 +204,54 @@ static unsigned int *rt_profiles[] = {
|
||||
static unsigned int nr_run_hysteresis = 2; /* 0.5 thread */
|
||||
static unsigned int nr_run_last;
|
||||
|
||||
struct runnables_avg_sample {
|
||||
u64 previous_integral;
|
||||
unsigned int avg;
|
||||
bool integral_sampled;
|
||||
u64 prev_timestamp;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
|
||||
|
||||
static unsigned int get_avg_nr_runnables(void)
|
||||
{
|
||||
unsigned int i, sum = 0;
|
||||
struct runnables_avg_sample *sample;
|
||||
u64 integral, old_integral, delta_integral, delta_time, cur_time;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
sample = &per_cpu(avg_nr_sample, i);
|
||||
integral = nr_running_integral(i);
|
||||
old_integral = sample->previous_integral;
|
||||
sample->previous_integral = integral;
|
||||
cur_time = ktime_to_ns(ktime_get());
|
||||
delta_time = cur_time - sample->prev_timestamp;
|
||||
sample->prev_timestamp = cur_time;
|
||||
|
||||
if (!sample->integral_sampled) {
|
||||
sample->integral_sampled = true;
|
||||
/* First sample to initialize prev_integral, skip
|
||||
* avg calculation
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if (integral < old_integral) {
|
||||
/* Overflow */
|
||||
delta_integral = (ULLONG_MAX - old_integral) + integral;
|
||||
} else {
|
||||
delta_integral = integral - old_integral;
|
||||
}
|
||||
|
||||
/* Calculate average for the previous sample window */
|
||||
do_div(delta_integral, delta_time);
|
||||
sample->avg = delta_integral;
|
||||
sum += sample->avg;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
static CPU_SPEED_BALANCE balanced_speed_balance(void)
|
||||
{
|
||||
unsigned long highest_speed = cpu_highest_speed();
|
||||
@@ -211,7 +259,7 @@ static CPU_SPEED_BALANCE balanced_speed_balance(void)
|
||||
unsigned long skewed_speed = balanced_speed / 2;
|
||||
unsigned int nr_cpus = num_online_cpus();
|
||||
unsigned int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
|
||||
unsigned int avg_nr_run = avg_nr_running();
|
||||
unsigned int avg_nr_run = get_avg_nr_runnables();
|
||||
unsigned int nr_run;
|
||||
unsigned int *current_profile = rt_profiles[rt_profile_sel];
|
||||
|
||||
|
||||
@@ -33,14 +33,15 @@ typedef enum {
|
||||
UP,
|
||||
} RUNNABLES_STATE;
|
||||
|
||||
static struct delayed_work runnables_work;
|
||||
static struct work_struct runnables_work;
|
||||
static struct kobject *runnables_kobject;
|
||||
|
||||
/* configurable parameters */
|
||||
static unsigned int sample_rate = 20; /* msec */
|
||||
static struct timer_list runnables_timer;
|
||||
|
||||
static RUNNABLES_STATE runnables_state;
|
||||
/* configurable parameters */
|
||||
static unsigned int sample_rate = 10; /* msec */
|
||||
|
||||
static RUNNABLES_STATE runnables_state;
|
||||
static struct workqueue_struct *runnables_wq;
|
||||
|
||||
#define NR_FSHIFT_EXP 3
|
||||
#define NR_FSHIFT (1 << NR_FSHIFT_EXP)
|
||||
@@ -56,25 +57,73 @@ static unsigned int nr_run_thresholds[NR_CPUS];
|
||||
|
||||
DEFINE_MUTEX(runnables_work_lock);
|
||||
|
||||
static void update_runnables_state(void)
|
||||
struct runnables_avg_sample {
|
||||
u64 previous_integral;
|
||||
unsigned int avg;
|
||||
bool integral_sampled;
|
||||
u64 prev_timestamp;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct runnables_avg_sample, avg_nr_sample);
|
||||
|
||||
/* EXP = alpha in the exponential moving average.
|
||||
* Alpha = e ^ (-sample_rate / window_size) * FIXED_1
|
||||
* Calculated for sample_rate of 10ms, window size of 63.82ms
|
||||
*/
|
||||
#define EXP 1751
|
||||
|
||||
static unsigned int get_avg_nr_runnables(void)
|
||||
{
|
||||
unsigned int i, sum = 0;
|
||||
static unsigned int avg;
|
||||
struct runnables_avg_sample *sample;
|
||||
u64 integral, old_integral, delta_integral, delta_time, cur_time;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
sample = &per_cpu(avg_nr_sample, i);
|
||||
integral = nr_running_integral(i);
|
||||
old_integral = sample->previous_integral;
|
||||
sample->previous_integral = integral;
|
||||
cur_time = ktime_to_ns(ktime_get());
|
||||
delta_time = cur_time - sample->prev_timestamp;
|
||||
sample->prev_timestamp = cur_time;
|
||||
|
||||
if (!sample->integral_sampled) {
|
||||
sample->integral_sampled = true;
|
||||
/* First sample to initialize prev_integral, skip
|
||||
* avg calculation
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if (integral < old_integral) {
|
||||
/* Overflow */
|
||||
delta_integral = (ULLONG_MAX - old_integral) + integral;
|
||||
} else {
|
||||
delta_integral = integral - old_integral;
|
||||
}
|
||||
|
||||
/* Calculate average for the previous sample window */
|
||||
do_div(delta_integral, delta_time);
|
||||
sample->avg = delta_integral;
|
||||
sum += sample->avg;
|
||||
}
|
||||
|
||||
/* Exponential moving average
|
||||
* Avgn = Avgn-1 * alpha + new_avg * (1 - alpha)
|
||||
*/
|
||||
avg *= EXP;
|
||||
avg += sum * (FIXED_1 - EXP);
|
||||
avg >>= FSHIFT;
|
||||
|
||||
return avg;
|
||||
}
|
||||
|
||||
static void update_runnables_state(unsigned int nr_run)
|
||||
{
|
||||
unsigned int nr_cpus = num_online_cpus();
|
||||
int max_cpus = pm_qos_request(PM_QOS_MAX_ONLINE_CPUS) ? : 4;
|
||||
int min_cpus = pm_qos_request(PM_QOS_MIN_ONLINE_CPUS);
|
||||
unsigned int avg_nr_run = avg_nr_running();
|
||||
unsigned int nr_run;
|
||||
|
||||
if (runnables_state == DISABLED)
|
||||
return;
|
||||
|
||||
for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
|
||||
unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
|
||||
if (nr_run_last <= nr_run)
|
||||
nr_threshold += NR_FSHIFT / nr_run_hysteresis;
|
||||
if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP)))
|
||||
break;
|
||||
}
|
||||
nr_run_last = nr_run;
|
||||
|
||||
if ((nr_cpus > max_cpus || nr_run < nr_cpus) && nr_cpus >= min_cpus) {
|
||||
runnables_state = DOWN;
|
||||
@@ -85,6 +134,31 @@ static void update_runnables_state(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void runnables_avg_sampler(unsigned long data)
|
||||
{
|
||||
unsigned int nr_run, avg_nr_run;
|
||||
|
||||
if (runnables_state == DISABLED)
|
||||
return;
|
||||
|
||||
avg_nr_run = get_avg_nr_runnables();
|
||||
mod_timer(&runnables_timer, jiffies + msecs_to_jiffies(sample_rate));
|
||||
|
||||
for (nr_run = 1; nr_run < ARRAY_SIZE(nr_run_thresholds); nr_run++) {
|
||||
unsigned int nr_threshold = nr_run_thresholds[nr_run - 1];
|
||||
if (nr_run_last <= nr_run)
|
||||
nr_threshold += NR_FSHIFT / nr_run_hysteresis;
|
||||
if (avg_nr_run <= (nr_threshold << (FSHIFT - NR_FSHIFT_EXP)))
|
||||
break;
|
||||
}
|
||||
|
||||
nr_run_last = nr_run;
|
||||
update_runnables_state(nr_run);
|
||||
|
||||
if (runnables_state != DISABLED && runnables_state != IDLE)
|
||||
schedule_work(&runnables_work);
|
||||
}
|
||||
|
||||
static unsigned int get_lightest_loaded_cpu_n(void)
|
||||
{
|
||||
unsigned long min_avg_runnables = ULONG_MAX;
|
||||
@@ -92,8 +166,8 @@ static unsigned int get_lightest_loaded_cpu_n(void)
|
||||
int i;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
unsigned int nr_runnables = get_avg_nr_running(i);
|
||||
|
||||
struct runnables_avg_sample *s = &per_cpu(avg_nr_sample, i);
|
||||
unsigned int nr_runnables = s->avg;
|
||||
if (i > 0 && min_avg_runnables > nr_runnables) {
|
||||
cpu = i;
|
||||
min_avg_runnables = nr_runnables;
|
||||
@@ -106,27 +180,23 @@ static unsigned int get_lightest_loaded_cpu_n(void)
|
||||
static void runnables_work_func(struct work_struct *work)
|
||||
{
|
||||
bool up = false;
|
||||
bool sample = false;
|
||||
unsigned int cpu = nr_cpu_ids;
|
||||
|
||||
mutex_lock(&runnables_work_lock);
|
||||
|
||||
update_runnables_state();
|
||||
/* Update state to avoid duplicate operations */
|
||||
update_runnables_state(nr_run_last);
|
||||
|
||||
switch (runnables_state) {
|
||||
case DISABLED:
|
||||
break;
|
||||
case IDLE:
|
||||
sample = true;
|
||||
break;
|
||||
case UP:
|
||||
cpu = cpumask_next_zero(0, cpu_online_mask);
|
||||
up = true;
|
||||
sample = true;
|
||||
break;
|
||||
case DOWN:
|
||||
cpu = get_lightest_loaded_cpu_n();
|
||||
sample = true;
|
||||
break;
|
||||
default:
|
||||
pr_err("%s: invalid cpuquiet runnable governor state %d\n",
|
||||
@@ -134,10 +204,6 @@ static void runnables_work_func(struct work_struct *work)
|
||||
break;
|
||||
}
|
||||
|
||||
if (sample)
|
||||
queue_delayed_work(runnables_wq, &runnables_work,
|
||||
msecs_to_jiffies(sample_rate));
|
||||
|
||||
if (cpu < nr_cpu_ids) {
|
||||
if (up)
|
||||
cpuquiet_wake_cpu(cpu);
|
||||
@@ -190,7 +256,7 @@ static void runnables_device_busy(void)
|
||||
{
|
||||
if (runnables_state != DISABLED) {
|
||||
runnables_state = DISABLED;
|
||||
cancel_delayed_work_sync(&runnables_work);
|
||||
cancel_work_sync(&runnables_work);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,15 +264,14 @@ static void runnables_device_free(void)
|
||||
{
|
||||
if (runnables_state == DISABLED) {
|
||||
runnables_state = IDLE;
|
||||
runnables_work_func(NULL);
|
||||
mod_timer(&runnables_timer, jiffies + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void runnables_stop(void)
|
||||
{
|
||||
runnables_state = DISABLED;
|
||||
cancel_delayed_work_sync(&runnables_work);
|
||||
destroy_workqueue(runnables_wq);
|
||||
cancel_work_sync(&runnables_work);
|
||||
kobject_put(runnables_kobject);
|
||||
}
|
||||
|
||||
@@ -218,12 +283,10 @@ static int runnables_start(void)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
runnables_wq = alloc_workqueue("cpuquiet-runnables",
|
||||
WQ_UNBOUND | WQ_RESCUER | WQ_FREEZABLE, 1);
|
||||
if (!runnables_wq)
|
||||
return -ENOMEM;
|
||||
INIT_WORK(&runnables_work, runnables_work_func);
|
||||
|
||||
INIT_DELAYED_WORK(&runnables_work, runnables_work_func);
|
||||
init_timer(&runnables_timer);
|
||||
runnables_timer.function = runnables_avg_sampler;
|
||||
|
||||
for(i = 0; i < ARRAY_SIZE(nr_run_thresholds); ++i) {
|
||||
if (i < ARRAY_SIZE(default_thresholds))
|
||||
@@ -236,7 +299,7 @@ static int runnables_start(void)
|
||||
}
|
||||
|
||||
runnables_state = IDLE;
|
||||
runnables_work_func(NULL);
|
||||
runnables_avg_sampler(0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user