diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 4a00ea91469f..d7230a182cf4 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -237,15 +237,16 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, * based on the task model parameters and gives the minimal utilization * required to meet deadlines. */ -unsigned long schedutil_freq_util(int cpu, unsigned long util, +unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs, unsigned long max, enum schedutil_type type) { - unsigned long dl_util, irq; + unsigned long dl_util, util, irq; struct rq *rq = cpu_rq(cpu); - if (sched_feat(SUGOV_RT_MAX_FREQ) && type == FREQUENCY_UTIL && - rt_rq_is_runnable(&rq->rt)) + if ((sched_feat(SUGOV_RT_MAX_FREQ) || !IS_BUILTIN(CONFIG_UCLAMP_TASK)) && + type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) { return max; + } /* * Early check to see if IRQ/steal time saturates the CPU, can be @@ -257,11 +258,21 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util, return max; /* - * The function is called with @util defined as the aggregation (the - * sum) of RT and CFS signals, hence leaving the special case of DL - * to be delt with. The exact way of doing things depend on the calling - * context. + * Because the time spend on RT/DL tasks is visible as 'lost' time to + * CFS tasks and we use the same metric to track the effective + * utilization (PELT windows are synchronized) we can directly add them + * to obtain the CPU's actual utilization. + * + * CFS and RT utilization can be boosted or capped, depending on + * utilization clamp constraints requested by currently RUNNABLE + * tasks. + * When there are no CFS RUNNABLE tasks, clamps are released and + * frequency will be gracefully reduced with the utilization decay. */ + util = util_cfs + cpu_util_rt(rq); + if (type == FREQUENCY_UTIL) + util = uclamp_util(rq, util); + dl_util = cpu_util_dl(rq); /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 00b9fbf81039..21072f7ff931 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11219,6 +11219,10 @@ const struct sched_class fair_sched_class = { #ifdef CONFIG_FAIR_GROUP_SCHED .task_change_group = task_change_group_fair, #endif + +#ifdef CONFIG_UCLAMP_TASK + .uclamp_enabled = 1, +#endif }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 150cde3459a8..1780e7399af8 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2409,6 +2409,10 @@ const struct sched_class rt_sched_class = { .switched_to = switched_to_rt, .update_curr = update_curr_rt, + +#ifdef CONFIG_UCLAMP_TASK + .uclamp_enabled = 1, +#endif }; #ifdef CONFIG_RT_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f3f6e1ac6120..a1878686f573 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2308,6 +2308,29 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ +#ifdef CONFIG_UCLAMP_TASK +static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) +{ + unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value); + unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value); + + /* + * Since CPU's {min,max}_util clamps are MAX aggregated considering + * RUNNABLE tasks with _different_ clamps, we can end up with an + * inversion. Fix it now when the clamps are applied. + */ + if (unlikely(min_util >= max_util)) + return min_util; + + return clamp(util, min_util, max_util); +} +#else /* CONFIG_UCLAMP_TASK */ +static inline unsigned int uclamp_util(struct rq *rq, unsigned int util) +{ + return util; +} +#endif /* CONFIG_UCLAMP_TASK */ + #ifdef arch_scale_freq_capacity # ifndef arch_scale_freq_invariant # define arch_scale_freq_invariant() true