diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 12fff3c1b914..9614f75e4857 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3231,6 +3231,19 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq) } #endif +/* + * Synchronize entity load avg of dequeued entity without locking + * the previous rq. + */ +void sync_entity_load_avg(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 last_update_time; + + last_update_time = cfs_rq_last_update_time(cfs_rq); + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); +} + /* * Task first catches up with cfs_rq, and then subtract * itself from the cfs_rq (task must be off the queue now). @@ -3238,7 +3251,6 @@ static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq) void remove_entity_load_avg(struct sched_entity *se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - u64 last_update_time; /* * tasks cannot exit without having gone through wake_up_new_task() -> @@ -3250,9 +3262,7 @@ void remove_entity_load_avg(struct sched_entity *se) * calls this. */ - last_update_time = cfs_rq_last_update_time(cfs_rq); - - __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); + sync_entity_load_avg(se); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); } @@ -6312,6 +6322,24 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre return target_cpu; } +/* + * cpu_util_wake: Compute cpu utilization with any contributions from + * the waking task p removed. + */ +static int cpu_util_wake(int cpu, struct task_struct *p) +{ + unsigned long util, capacity; + + /* Task has no contribution or is new */ + if (cpu != task_cpu(p) || !p->se.avg.last_update_time) + return cpu_util(cpu); + + capacity = capacity_orig_of(cpu); + util = max_t(long, cpu_rq(cpu)->cfs.avg.util_avg - task_util(p), 0); + + return (util >= capacity) ? capacity : util; +} + /* * Disable WAKE_AFFINE in the case where task @p doesn't fit in the * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu. @@ -6327,6 +6355,10 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) /* Minimum capacity is close to max, no need to abort wake_affine */ if (max_cap - min_cap < max_cap >> 3) return 0; + + /* Bring task utilization in sync with prev_cpu */ + sync_entity_load_avg(&p->se); + return min_cap * 1024 < task_util(p) * capacity_margin; }