From 4ee45c385faa8822557f049c33512dcf2d972a3d Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Tue, 12 Apr 2022 16:51:20 +0800 Subject: [PATCH] soc: rockchip: rockchip_performance: optimize tasks schedule policy When level==0(low-performance mode): 1. prefer prev_cpu for rt tasks if prev cpu is fit. 2. make sure that it saves at least 6% of the energy when migrate tasks from little cpu to big cpu. When level==2(high-performance mode): 1. do not use EAS path. 2. select big cpu first when system is not overutilized. 3. do not trigger load_balance() when system is not overutilized. 4. prefer prev_cpu for rt tasks if prev cpu is fit. Test performance improvement for level==2: 1. CONFIG_ROCKCHIP_PERFORMANCE=n EMMC Random Write(4KB) 25.44MB/s Antutu: Total 581266 CPU 133023 GPU 234106 MEM 103602 UX 110535 2. CONFIG_ROCKCHIP_PERFORMANCE=y and level==2 EMMC Random Write(4KB) 44.19MB/s (73.7% improvement) Antutu: Total 600483 (3.3% improvement) CPU 134481 (1.1% improvement) GPU 234678 MEM 116551 (12.5% improvement) UX 114773 (3.8% improvement) Change-Id: I949ac229864eb12159b886b7769e0b489345bef4 Signed-off-by: Liang Chen --- drivers/soc/rockchip/rockchip_performance.c | 26 ++++++++- include/soc/rockchip/rockchip_performance.h | 12 +++- kernel/sched/fair.c | 65 ++++++++++++++++++++- 3 files changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/soc/rockchip/rockchip_performance.c b/drivers/soc/rockchip/rockchip_performance.c index 005bf7b1b578..e8df14647b47 100644 --- a/drivers/soc/rockchip/rockchip_performance.c +++ b/drivers/soc/rockchip/rockchip_performance.c @@ -124,9 +124,26 @@ int rockchip_perf_get_level(void) return perf_level; } +struct cpumask *rockchip_perf_get_cpul_mask(void) +{ + if (static_branch_unlikely(&sched_asym_cpucapacity)) + return cpul_mask; + + return NULL; +} + +struct cpumask *rockchip_perf_get_cpub_mask(void) +{ + if (static_branch_unlikely(&sched_asym_cpucapacity)) + return cpub_mask; + + return NULL; +} + #ifdef CONFIG_SMP int rockchip_perf_select_rt_cpu(int prev_cpu, struct cpumask *lowest_mask) { + struct cpumask target_mask; int cpu = nr_cpu_ids; if (!perf_init_done) @@ -134,9 +151,14 @@ int rockchip_perf_select_rt_cpu(int prev_cpu, struct cpumask *lowest_mask) if (static_branch_unlikely(&sched_asym_cpucapacity)) { if (perf_level == 0) - cpu = cpumask_first_and(lowest_mask, cpul_mask); + cpumask_and(&target_mask, lowest_mask, cpul_mask); if (perf_level == 2) - cpu = cpumask_first_and(lowest_mask, cpub_mask); + cpumask_and(&target_mask, lowest_mask, cpub_mask); + + if (cpumask_test_cpu(prev_cpu, &target_mask)) + return prev_cpu; + + cpu = cpumask_first(&target_mask); if (cpu < nr_cpu_ids) return cpu; diff --git a/include/soc/rockchip/rockchip_performance.h b/include/soc/rockchip/rockchip_performance.h index 629a4c01a82c..c080fb65c7a5 100644 --- a/include/soc/rockchip/rockchip_performance.h +++ b/include/soc/rockchip/rockchip_performance.h @@ -5,13 +5,23 @@ #ifndef __SOC_ROCKCHIP_PERFORMANCE_H #define __SOC_ROCKCHIP_PERFORMANCE_H +enum { + ROCKCHIP_PERFORMANCE_LOW = 0, + ROCKCHIP_PERFORMANCE_NORMAL, + ROCKCHIP_PERFORMANCE_HIGH +}; + #ifdef CONFIG_ROCKCHIP_PERFORMANCE extern int rockchip_perf_get_level(void); +extern struct cpumask *rockchip_perf_get_cpul_mask(void); +extern struct cpumask *rockchip_perf_get_cpub_mask(void); extern int rockchip_perf_select_rt_cpu(int prev_cpu, struct cpumask *lowest_mask); extern bool rockchip_perf_misfit_rt(int cpu); extern void rockchip_perf_uclamp_sync_util_min_rt_default(void); #else -static inline int rockchip_perf_get_level(void) { return 1; } +static inline int rockchip_perf_get_level(void) { return ROCKCHIP_PERFORMANCE_NORMAL; } +static inline struct cpumask *rockchip_perf_get_cpul_mask(void) { return NULL; }; +static inline struct cpumask *rockchip_perf_get_cpub_mask(void) { return NULL; }; static inline int rockchip_perf_select_rt_cpu(int prev_cpu, struct cpumask *lowest_mask) { return prev_cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index acdad507b044..6411a62c18d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6347,6 +6347,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) return recent_used_cpu; } + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { + if (rockchip_perf_get_level() == ROCKCHIP_PERFORMANCE_HIGH) + goto sd_llc; + } + /* * For asymmetric CPU capacity systems, our domain of interest is * sd_asym_cpucapacity rather than sd_llc. @@ -6367,6 +6372,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) } } +sd_llc: sd = rcu_dereference(per_cpu(sd_llc, target)); if (!sd) return target; @@ -6808,9 +6814,22 @@ unlock: if (prev_delta == ULONG_MAX) return best_energy_cpu; + /* + * when select ROCKCHIP_PERFORMANCE_LOW: + * Pick best_energy_cpu immediately if prev_cpu is big cpu and + * best_energy_cpu is little cpu, so that tasks can migrate from + * big cpu to little cpu easier to save power. + */ if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { - if (rockchip_perf_get_level() == 0) + struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask(); + struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask(); + int level = rockchip_perf_get_level(); + + if ((level == ROCKCHIP_PERFORMANCE_LOW) && cpul_mask && + cpub_mask && cpumask_test_cpu(prev_cpu, cpub_mask) && + cpumask_test_cpu(best_energy_cpu, cpul_mask)) { return best_energy_cpu; + } } if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4)) @@ -6857,6 +6876,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { + if (rockchip_perf_get_level() == ROCKCHIP_PERFORMANCE_HIGH) + goto no_eas; + } + if (sched_energy_enabled()) { new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync); if (new_cpu >= 0) @@ -6864,6 +6888,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f new_cpu = prev_cpu; } +no_eas: want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); } @@ -6896,6 +6921,23 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { + struct root_domain *rd = cpu_rq(cpu)->rd; + struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask(); + struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask(); + int level = rockchip_perf_get_level(); + + if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) && + cpul_mask && cpub_mask && cpumask_intersects(p->cpus_ptr, cpub_mask) && + cpumask_test_cpu(new_cpu, cpul_mask)) { + for_each_domain(cpu, tmp) { + sd = tmp; + } + if (sd) + new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); + } + } + if (want_affine) current->recent_used_cpu = cpu; } @@ -8975,6 +9017,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) do { int local_group; + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { + struct root_domain *rd = cpu_rq(this_cpu)->rd; + struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask(); + int level = rockchip_perf_get_level(); + + if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) && + cpub_mask && cpumask_intersects(p->cpus_ptr, cpub_mask) && + !cpumask_intersects(sched_group_span(group), cpub_mask)) + continue; + } + /* Skip over this group if it has no CPUs allowed */ if (!cpumask_intersects(sched_group_span(group), p->cpus_ptr)) @@ -9701,6 +9754,16 @@ static int should_we_balance(struct lb_env *env) struct sched_group *sg = env->sd->groups; int cpu; + if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) { + struct root_domain *rd = env->dst_rq->rd; + struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask(); + int level = rockchip_perf_get_level(); + + if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) && + cpul_mask && cpumask_test_cpu(env->dst_cpu, cpul_mask)) + return 0; + } + /* * Ensure the balancing environment is consistent; can happen * when the softirq triggers 'during' hotplug.