diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index 62f30858e7b9..fbdd00a7aabf 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -108,6 +108,64 @@ __schedtune_accept_deltas(int nrg_delta, int cap_delta, /* * EAS scheduler tunables for task groups. + * + * When CGroup support is enabled, we have to synchronize two different + * paths: + * - slow path: where CGroups are created/updated/removed + * - fast path: where tasks in a CGroups are accounted + * + * The slow path tracks (a limited number of) CGroups and maps each on a + * "boost_group" index. The fastpath accounts tasks currently RUNNABLE on each + * "boost_group". + * + * Once a new CGroup is created, a boost group idx is assigned and the + * corresponding "boost_group" marked as valid on each CPU. + * Once a CGroup is release, the corresponding "boost_group" is marked as + * invalid on each CPU. The CPU boost value (boost_max) is aggregated by + * considering only valid boost_groups with a non null tasks counter. + * + * .:: Locking strategy + * + * The fast path uses a spin lock for each CPU boost_group which protects the + * tasks counter. + * + * The "valid" and "boost" values of each CPU boost_group is instead + * protected by the RCU lock provided by the CGroups callbacks. Thus, only the + * slow path can access and modify the boost_group attribtues of each CPU. + * The fast path will catch up the most updated values at the next scheduling + * event (i.e. enqueue/dequeue). + * + * | + * SLOW PATH | FAST PATH + * CGroup add/update/remove | Scheduler enqueue/dequeue events + * | + * | + * | DEFINE_PER_CPU(struct boost_groups) + * | +--------------+----+---+----+----+ + * | | idle | | | | | + * | | boost_max | | | | | + * | +---->lock | | | | | + * struct schedtune allocated_groups | | | group[ ] | | | | | + * +------------------------------+ +-------+ | | +--+---------+-+----+---+----+----+ + * | idx | | | | | | valid | + * | boots / prefer_idle | | | | | | boost | + * | perf_{boost/constraints}_idx | <---------+(*) | | | | tasks | <------------+ + * | css | +-------+ | | +---------+ | + * +-+----------------------------+ | | | | | | | + * ^ | | | | | | | + * | +-------+ | | +---------+ | + * | | | | | | | | + * | | | | | | | | + * | +-------+ | | +---------+ | + * | zmalloc | | | | | | | + * | | | | | | | | + * | +-------+ | | +---------+ | + * + BOOSTGROUPS_COUNT | | BOOSTGROUPS_COUNT | + * schedtune_boostgroup_init() | + | + * | schedtune_{en,de}queue_task() | + * | + + * | schedtune_tasks_update() + * | */ /* SchdTune tunables for a group of tasks */ @@ -226,6 +284,8 @@ struct boost_groups { /* Maximum boost value for all RUNNABLE tasks on a CPU */ int boost_max; struct { + /* True when this boost group maps an actual cgroup */ + bool valid; /* The boost for tasks on that boost group */ int boost; /* Count of RUNNABLE tasks on that boost group */ @@ -250,6 +310,11 @@ schedtune_cpu_update(int cpu) /* The root boost group is always active */ boost_max = bg->group[0].boost; for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { + + /* Ignore non boostgroups not mapping a cgroup */ + if (!bg->group[idx].valid) + continue; + /* * A boost group affects a CPU only if it has * RUNNABLE tasks on that CPU @@ -259,6 +324,7 @@ schedtune_cpu_update(int cpu) boost_max = max(boost_max, bg->group[idx].boost); } + /* Ensures boost_max is non-negative when all cgroup boost values * are neagtive. Avoids under-accounting of cpu capacity which may cause * task stacking and frequency spikes.*/ @@ -278,6 +344,9 @@ schedtune_boostgroup_update(int idx, int boost) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); + /* CGroups are never associated to non active cgroups */ + BUG_ON(!bg->group[idx].valid); + /* * Keep track of current boost values to compute the per CPU * maximum only when it has been affected by the new value of @@ -642,7 +711,7 @@ schedtune_boostgroup_init(struct schedtune *st, int idx) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); bg->group[idx].boost = 0; - bg->group[idx].tasks = 0; + bg->group[idx].valid = true; } /* Keep track of allocated boost groups */ @@ -697,6 +766,7 @@ schedtune_boostgroup_release(struct schedtune *st) /* Reset per CPUs boost group support */ for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); + bg->group[st->idx].valid = false; bg->group[st->idx].boost = 0; } @@ -733,6 +803,7 @@ schedtune_init_cgroups(void) for_each_possible_cpu(cpu) { bg = &per_cpu(cpu_boost_groups, cpu); memset(bg, 0, sizeof(struct boost_groups)); + bg->group[0].valid = true; raw_spin_lock_init(&bg->lock); }