From 889f8be857d8e1a2a8d7bb4a0d7bedcb12c1a401 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 1 Oct 2021 14:30:27 +0100 Subject: [PATCH] ANDROID: sched: Introducing PELT multiplier The new sysctl sched_pelt_multiplier allows a user to set a clock multiplier x2 or x4 (x1 being the default). This clock multiplier artificially speed-up PELT ramp up/down similarly to a faster half-life. Indeed, if we write PELT as a first order filter: y(t) = G * (1 - exp(t/tau)) Then we can see that multiplying the time by a constant X, is the same as dividing the time constant tau by X. y(t) = G * (1 - exp((t*X)/tau)) y(t) = G * (1 - exp(t/(tau/X))) Tau being half-life*ln(2), multiplying the PELT time is the same as dividing the half-life: - x1: 32ms half-life - x2: 16ms half-life - x4: 8ms half-life Internally, a new clock is created: rq->clock_task_mult. It sits in the clock hierarchy between rq->clock_task and rq->clock_pelt. Bug: 177593580 Change-Id: I67e6ca7994bebea22bf75732ee11d2b10e0d6b7e Suggested-by: Morten Rasmussen Signed-off-by: Vincent Donnefort --- include/linux/sched/sysctl.h | 7 +++++++ kernel/sched/fair.c | 6 +++--- kernel/sched/pelt.c | 40 ++++++++++++++++++++++++++++++++++++ kernel/sched/pelt.h | 11 ++++++++-- kernel/sched/sched.h | 9 ++++++++ kernel/sysctl.c | 9 ++++++++ 6 files changed, 77 insertions(+), 5 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 304f431178fd..d5ebdb6b79de 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -75,6 +75,13 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_SMP +extern unsigned int sysctl_sched_pelt_multiplier; + +int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +#endif + #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; int sched_energy_aware_handler(struct ctl_table *table, int write, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 037f8a83c02a..8ecdf50451d6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4822,7 +4822,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->throttle_count--; if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - + cfs_rq->throttled_clock_task_time += rq_clock_task_mult(rq) - cfs_rq->throttled_clock_task; /* Add cfs_rq with load or one or more already running entities to the list */ @@ -4840,7 +4840,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) /* group is entering throttled state, stop time */ if (!cfs_rq->throttle_count) { - cfs_rq->throttled_clock_task = rq_clock_task(rq); + cfs_rq->throttled_clock_task = rq_clock_task_mult(rq); list_del_leaf_cfs_rq(cfs_rq); } cfs_rq->throttle_count++; @@ -5284,7 +5284,7 @@ static void sync_throttle(struct task_group *tg, int cpu) pcfs_rq = tg->parent->cfs_rq[cpu]; cfs_rq->throttle_count = pcfs_rq->throttle_count; - cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu)); + cfs_rq->throttled_clock_task = rq_clock_task_mult(cpu_rq(cpu)); } /* conditionally throttle active cfs_rq's from put_prev_entity() */ diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 3734bc6d33a4..a9e381a71c12 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -472,3 +472,43 @@ int update_irq_load_avg(struct rq *rq, u64 running) return ret; } #endif + +unsigned int sysctl_sched_pelt_multiplier = 1; +__read_mostly unsigned int sched_pelt_lshift; + +int sched_pelt_multiplier(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + static DEFINE_MUTEX(mutex); + unsigned int old; + int ret; + + mutex_lock(&mutex); + + old = sysctl_sched_pelt_multiplier; + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (ret) + goto undo; + if (!write) + goto done; + + switch (sysctl_sched_pelt_multiplier) { + case 1: + fallthrough; + case 2: + fallthrough; + case 4: + WRITE_ONCE(sched_pelt_lshift, + sysctl_sched_pelt_multiplier >> 1); + goto done; + default: + ret = -EINVAL; + } + +undo: + sysctl_sched_pelt_multiplier = old; +done: + mutex_unlock(&mutex); + + return ret; +} diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index e06071bf3472..7276ca99ecda 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -59,6 +59,8 @@ static inline void cfs_se_util_change(struct sched_avg *avg) WRITE_ONCE(avg->util_est.enqueued, enqueued); } +extern unsigned int sched_pelt_lshift; + /* * The clock_pelt scales the time to reflect the effective amount of * computation done during the running delta time but then sync back to @@ -73,9 +75,13 @@ static inline void cfs_se_util_change(struct sched_avg *avg) */ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta) { + delta <<= READ_ONCE(sched_pelt_lshift); + + rq->clock_task_mult += delta; + if (unlikely(is_idle_task(rq->curr))) { /* The rq is idle, we can sync to clock_task */ - rq->clock_pelt = rq_clock_task(rq); + rq->clock_pelt = rq_clock_task_mult(rq); return; } @@ -127,7 +133,8 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq) * rq's clock_task. */ if (util_sum >= divider) - rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt; + rq->lost_idle_time += rq_clock_task_mult(rq) - + rq->clock_pelt; } static inline u64 rq_clock_pelt(struct rq *rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 040402c141c0..28a89f7d0e5e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -992,6 +992,7 @@ struct rq { u64 clock; /* Ensure that all clocks are in the same cache line */ u64 clock_task ____cacheline_aligned; + u64 clock_task_mult; u64 clock_pelt; unsigned long lost_idle_time; @@ -1490,6 +1491,14 @@ static inline u64 rq_clock_task(struct rq *rq) return rq->clock_task; } +static inline u64 rq_clock_task_mult(struct rq *rq) +{ + lockdep_assert_rq_held(rq); + assert_clock_updated(rq); + + return rq->clock_task_mult; +} + /** * By default the decay is the default pelt decay period. * The decay shift can change the decay period in diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 083be6af29d7..68e61f45b785 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1846,6 +1846,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rr_handler, }, +#ifdef CONFIG_SMP + { + .procname = "sched_pelt_multiplier", + .data = &sysctl_sched_pelt_multiplier, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sched_pelt_multiplier, + }, +#endif #ifdef CONFIG_UCLAMP_TASK { .procname = "sched_util_clamp_min",