Revert "sched: Make struct sched_statistics independent of fair sched class"

This reverts commit c3b9f95598.

It breaks the ABI and is not needed for now.  It will come in at the
next ABI break so that future releases will be easier to manage.

Bug: 161946584
Change-Id: I46e772dffa5ce4fb291a4cb029528100a773109b
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2023-06-10 08:41:14 +00:00
parent 5390c024c3
commit 87f6012910
8 changed files with 100 additions and 143 deletions

View File

@@ -526,7 +526,7 @@ struct sched_statistics {
u64 nr_wakeups_passive;
u64 nr_wakeups_idle;
#endif
} ____cacheline_aligned;
};
struct sched_entity {
/* For load-balancing: */
@@ -542,6 +542,8 @@ struct sched_entity {
u64 nr_migrations;
struct sched_statistics statistics;
#ifdef CONFIG_FAIR_GROUP_SCHED
int depth;
struct sched_entity *parent;
@@ -815,8 +817,6 @@ struct task_struct {
struct uclamp_se uclamp[UCLAMP_CNT];
#endif
struct sched_statistics stats;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers;

View File

@@ -3614,11 +3614,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
#ifdef CONFIG_SMP
if (cpu == rq->cpu) {
__schedstat_inc(rq->ttwu_local);
__schedstat_inc(p->stats.nr_wakeups_local);
__schedstat_inc(p->se.statistics.nr_wakeups_local);
} else {
struct sched_domain *sd;
__schedstat_inc(p->stats.nr_wakeups_remote);
__schedstat_inc(p->se.statistics.nr_wakeups_remote);
rcu_read_lock();
for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
@@ -3630,14 +3630,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
}
if (wake_flags & WF_MIGRATED)
__schedstat_inc(p->stats.nr_wakeups_migrate);
__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
#endif /* CONFIG_SMP */
__schedstat_inc(rq->ttwu_count);
__schedstat_inc(p->stats.nr_wakeups);
__schedstat_inc(p->se.statistics.nr_wakeups);
if (wake_flags & WF_SYNC)
__schedstat_inc(p->stats.nr_wakeups_sync);
__schedstat_inc(p->se.statistics.nr_wakeups_sync);
}
/*
@@ -4363,7 +4363,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#ifdef CONFIG_SCHEDSTATS
/* Even if schedstat is disabled, there should not be garbage */
memset(&p->stats, 0, sizeof(p->stats));
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
RB_CLEAR_NODE(&p->dl.rb_node);
@@ -9890,9 +9890,9 @@ void normalize_rt_tasks(void)
continue;
p->se.exec_start = 0;
schedstat_set(p->stats.wait_start, 0);
schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->stats.block_start, 0);
schedstat_set(p->se.statistics.wait_start, 0);
schedstat_set(p->se.statistics.sleep_start, 0);
schedstat_set(p->se.statistics.block_start, 0);
if (!dl_task(p) && !rt_task(p)) {
/*
@@ -10787,14 +10787,11 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
if (schedstat_enabled() && tg != &root_task_group) {
struct sched_statistics *stats;
u64 ws = 0;
int i;
for_each_possible_cpu(i) {
stats = __schedstats_from_se(tg->se[i]);
ws += schedstat_val(stats->wait_sum);
}
for_each_possible_cpu(i)
ws += schedstat_val(tg->se[i]->statistics.wait_sum);
seq_printf(sf, "wait_sum %llu\n", ws);
}

View File

@@ -1266,8 +1266,8 @@ static void update_curr_dl(struct rq *rq)
return;
}
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);

View File

@@ -450,11 +450,9 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
struct sched_entity *se = tg->se[cpu];
#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
#F, (long long)schedstat_val(stats->F))
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
#F, SPLIT_NS((long long)schedstat_val(stats->F)))
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
if (!se)
return;
@@ -464,18 +462,16 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN(se->sum_exec_runtime);
if (schedstat_enabled()) {
struct sched_statistics *stats = __schedstats_from_se(se);
PN_SCHEDSTAT(wait_start);
PN_SCHEDSTAT(sleep_start);
PN_SCHEDSTAT(block_start);
PN_SCHEDSTAT(sleep_max);
PN_SCHEDSTAT(block_max);
PN_SCHEDSTAT(exec_max);
PN_SCHEDSTAT(slice_max);
PN_SCHEDSTAT(wait_max);
PN_SCHEDSTAT(wait_sum);
P_SCHEDSTAT(wait_count);
PN_SCHEDSTAT(se->statistics.wait_start);
PN_SCHEDSTAT(se->statistics.sleep_start);
PN_SCHEDSTAT(se->statistics.block_start);
PN_SCHEDSTAT(se->statistics.sleep_max);
PN_SCHEDSTAT(se->statistics.block_max);
PN_SCHEDSTAT(se->statistics.exec_max);
PN_SCHEDSTAT(se->statistics.slice_max);
PN_SCHEDSTAT(se->statistics.wait_max);
PN_SCHEDSTAT(se->statistics.wait_sum);
P_SCHEDSTAT(se->statistics.wait_count);
}
P(se->load.weight);
@@ -542,9 +538,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
p->prio);
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
SPLIT_NS(p->se.sum_exec_runtime),
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
#ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -950,8 +946,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
"---------------------------------------------------------"
"----------\n");
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
PN(se.exec_start);
PN(se.vruntime);
@@ -964,33 +960,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
if (schedstat_enabled()) {
u64 avg_atom, avg_per_cpu;
PN_SCHEDSTAT(sum_sleep_runtime);
PN_SCHEDSTAT(wait_start);
PN_SCHEDSTAT(sleep_start);
PN_SCHEDSTAT(block_start);
PN_SCHEDSTAT(sleep_max);
PN_SCHEDSTAT(block_max);
PN_SCHEDSTAT(exec_max);
PN_SCHEDSTAT(slice_max);
PN_SCHEDSTAT(wait_max);
PN_SCHEDSTAT(wait_sum);
P_SCHEDSTAT(wait_count);
PN_SCHEDSTAT(iowait_sum);
P_SCHEDSTAT(iowait_count);
P_SCHEDSTAT(nr_migrations_cold);
P_SCHEDSTAT(nr_failed_migrations_affine);
P_SCHEDSTAT(nr_failed_migrations_running);
P_SCHEDSTAT(nr_failed_migrations_hot);
P_SCHEDSTAT(nr_forced_migrations);
P_SCHEDSTAT(nr_wakeups);
P_SCHEDSTAT(nr_wakeups_sync);
P_SCHEDSTAT(nr_wakeups_migrate);
P_SCHEDSTAT(nr_wakeups_local);
P_SCHEDSTAT(nr_wakeups_remote);
P_SCHEDSTAT(nr_wakeups_affine);
P_SCHEDSTAT(nr_wakeups_affine_attempts);
P_SCHEDSTAT(nr_wakeups_passive);
P_SCHEDSTAT(nr_wakeups_idle);
PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
PN_SCHEDSTAT(se.statistics.wait_start);
PN_SCHEDSTAT(se.statistics.sleep_start);
PN_SCHEDSTAT(se.statistics.block_start);
PN_SCHEDSTAT(se.statistics.sleep_max);
PN_SCHEDSTAT(se.statistics.block_max);
PN_SCHEDSTAT(se.statistics.exec_max);
PN_SCHEDSTAT(se.statistics.slice_max);
PN_SCHEDSTAT(se.statistics.wait_max);
PN_SCHEDSTAT(se.statistics.wait_sum);
P_SCHEDSTAT(se.statistics.wait_count);
PN_SCHEDSTAT(se.statistics.iowait_sum);
P_SCHEDSTAT(se.statistics.iowait_count);
P_SCHEDSTAT(se.statistics.nr_migrations_cold);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
P_SCHEDSTAT(se.statistics.nr_forced_migrations);
P_SCHEDSTAT(se.statistics.nr_wakeups);
P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
P_SCHEDSTAT(se.statistics.nr_wakeups_local);
P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
avg_atom = p->se.sum_exec_runtime;
if (nr_switches)
@@ -1056,7 +1052,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
void proc_sched_set_task(struct task_struct *p)
{
#ifdef CONFIG_SCHEDSTATS
memset(&p->stats, 0, sizeof(p->stats));
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
}

View File

@@ -846,13 +846,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->exec_start = now;
if (schedstat_enabled()) {
struct sched_statistics *stats;
stats = __schedstats_from_se(curr);
__schedstat_set(stats->exec_max,
max(delta_exec, stats->exec_max));
}
schedstat_set(curr->statistics.exec_max,
max(delta_exec, curr->statistics.exec_max));
curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq->exec_clock, delta_exec);
@@ -880,45 +875,39 @@ static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
u64 wait_start, prev_wait_start;
struct sched_statistics *stats;
if (!schedstat_enabled())
return;
stats = __schedstats_from_se(se);
wait_start = rq_clock(rq_of(cfs_rq));
prev_wait_start = schedstat_val(stats->wait_start);
prev_wait_start = schedstat_val(se->statistics.wait_start);
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
likely(wait_start > prev_wait_start))
wait_start -= prev_wait_start;
__schedstat_set(stats->wait_start, wait_start);
__schedstat_set(se->statistics.wait_start, wait_start);
}
static inline void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct sched_statistics *stats;
struct task_struct *p = NULL;
struct task_struct *p;
u64 delta;
if (!schedstat_enabled())
return;
stats = __schedstats_from_se(se);
/*
* When the sched_schedstat changes from 0 to 1, some sched se
* maybe already in the runqueue, the se->statistics.wait_start
* will be 0.So it will let the delta wrong. We need to avoid this
* scenario.
*/
if (unlikely(!schedstat_val(stats->wait_start)))
if (unlikely(!schedstat_val(se->statistics.wait_start)))
return;
delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(stats->wait_start);
delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
if (entity_is_task(se)) {
p = task_of(se);
@@ -928,33 +917,30 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
__schedstat_set(stats->wait_start, delta);
__schedstat_set(se->statistics.wait_start, delta);
return;
}
trace_sched_stat_wait(p, delta);
}
__schedstat_set(stats->wait_max,
max(schedstat_val(stats->wait_max), delta));
__schedstat_inc(stats->wait_count);
__schedstat_add(stats->wait_sum, delta);
__schedstat_set(stats->wait_start, 0);
__schedstat_set(se->statistics.wait_max,
max(schedstat_val(se->statistics.wait_max), delta));
__schedstat_inc(se->statistics.wait_count);
__schedstat_add(se->statistics.wait_sum, delta);
__schedstat_set(se->statistics.wait_start, 0);
}
static inline void
update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct sched_statistics *stats;
struct task_struct *tsk = NULL;
u64 sleep_start, block_start;
if (!schedstat_enabled())
return;
stats = __schedstats_from_se(se);
sleep_start = schedstat_val(stats->sleep_start);
block_start = schedstat_val(stats->block_start);
sleep_start = schedstat_val(se->statistics.sleep_start);
block_start = schedstat_val(se->statistics.block_start);
if (entity_is_task(se))
tsk = task_of(se);
@@ -965,11 +951,11 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->sleep_max)))
__schedstat_set(stats->sleep_max, delta);
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
__schedstat_set(se->statistics.sleep_max, delta);
__schedstat_set(stats->sleep_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
__schedstat_set(se->statistics.sleep_start, 0);
__schedstat_add(se->statistics.sum_sleep_runtime, delta);
if (tsk) {
account_scheduler_latency(tsk, delta >> 10, 1);
@@ -982,16 +968,16 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->block_max)))
__schedstat_set(stats->block_max, delta);
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
__schedstat_set(se->statistics.block_max, delta);
__schedstat_set(stats->block_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
__schedstat_set(se->statistics.block_start, 0);
__schedstat_add(se->statistics.sum_sleep_runtime, delta);
if (tsk) {
if (tsk->in_iowait) {
__schedstat_add(stats->iowait_sum, delta);
__schedstat_inc(stats->iowait_count);
__schedstat_add(se->statistics.iowait_sum, delta);
__schedstat_inc(se->statistics.iowait_count);
trace_sched_stat_iowait(tsk, delta);
}
@@ -1053,10 +1039,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
/* XXX racy against TTWU */
state = READ_ONCE(tsk->__state);
if (state & TASK_INTERRUPTIBLE)
__schedstat_set(tsk->stats.sleep_start,
__schedstat_set(se->statistics.sleep_start,
rq_clock(rq_of(cfs_rq)));
if (state & TASK_UNINTERRUPTIBLE)
__schedstat_set(tsk->stats.block_start,
__schedstat_set(se->statistics.block_start,
rq_clock(rq_of(cfs_rq)));
}
}
@@ -4736,11 +4722,8 @@ void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
if (schedstat_enabled() &&
rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
struct sched_statistics *stats;
stats = __schedstats_from_se(se);
__schedstat_set(stats->slice_max,
max((u64)stats->slice_max,
__schedstat_set(se->statistics.slice_max,
max((u64)se->statistics.slice_max,
se->sum_exec_runtime - se->prev_sum_exec_runtime));
}
@@ -6254,12 +6237,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
schedstat_inc(p->stats.nr_wakeups_affine_attempts);
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
if (target != this_cpu)
return prev_cpu;
schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->stats.nr_wakeups_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine);
return target;
}
@@ -8181,7 +8164,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
int cpu;
schedstat_inc(p->stats.nr_failed_migrations_affine);
schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
env->flags |= LBF_SOME_PINNED;
@@ -8215,7 +8198,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
env->flags &= ~LBF_ALL_PINNED;
if (task_running(env->src_rq, p)) {
schedstat_inc(p->stats.nr_failed_migrations_running);
schedstat_inc(p->se.statistics.nr_failed_migrations_running);
return 0;
}
@@ -8237,12 +8220,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (tsk_cache_hot == 1) {
schedstat_inc(env->sd->lb_hot_gained[env->idle]);
schedstat_inc(p->stats.nr_forced_migrations);
schedstat_inc(p->se.statistics.nr_forced_migrations);
}
return 1;
}
schedstat_inc(p->stats.nr_failed_migrations_hot);
schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
return 0;
}
@@ -11970,7 +11953,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
if (!cfs_rq)
goto err;
se = kzalloc_node(sizeof(struct sched_entity_stats),
se = kzalloc_node(sizeof(struct sched_entity),
GFP_KERNEL, cpu_to_node(i));
if (!se)
goto err_free_rq;

View File

@@ -1030,8 +1030,8 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);

View File

@@ -41,7 +41,6 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
#else /* !CONFIG_SCHEDSTATS: */
static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
@@ -54,26 +53,8 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt
# define schedstat_set(var, val) do { } while (0)
# define schedstat_val(var) 0
# define schedstat_val_or_zero(var) 0
#endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_FAIR_GROUP_SCHED
struct sched_entity_stats {
struct sched_entity se;
struct sched_statistics stats;
} __no_randomize_layout;
#endif
static inline struct sched_statistics *
__schedstats_from_se(struct sched_entity *se)
{
#ifdef CONFIG_FAIR_GROUP_SCHED
if (!entity_is_task(se))
return &container_of(se, struct sched_entity_stats, se)->stats;
#endif
return &task_of(se)->stats;
}
#ifdef CONFIG_PSI
/*
* PSI tracks state that persists across sleeps, such as iowaits and

View File

@@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
schedstat_set(curr->stats.exec_max,
max(curr->stats.exec_max, delta_exec));
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);