From e931b1d2225cd7eea4586c2f2482c4415dd7ccf1 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 7 Nov 2022 16:00:54 -0800 Subject: [PATCH] Revert "FROMLIST: mm: multi-gen LRU: minimal implementation" This reverts commit a1537a68c5010553002f62fad91c7c0c886c73d8. To be replaced with upstream version. Bug: 249601646 Change-Id: I3dfbb3ec56cfdb5a2db7ec00c124dae471cce932 Signed-off-by: Kalesh Singh --- include/linux/mm_inline.h | 24 -- include/linux/mmzone.h | 41 -- kernel/bounds.c | 2 +- mm/Kconfig | 11 - mm/swap.c | 42 -- mm/vmscan.c | 806 +------------------------------------- mm/workingset.c | 119 +----- 7 files changed, 4 insertions(+), 1041 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 1b78e23f7e1c..9bacdf85c8af 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -107,19 +107,6 @@ static inline int lru_gen_from_seq(unsigned long seq) return seq % MAX_NR_GENS; } -static inline int lru_hist_from_seq(unsigned long seq) -{ - return seq % NR_HIST_GENS; -} - -static inline int lru_tier_from_refs(int refs) -{ - VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH)); - - /* see the comment on MAX_NR_TIERS */ - return order_base_2(refs + 1); -} - static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) { unsigned long max_seq = lruvec->lrugen.max_seq; @@ -165,15 +152,6 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page, __update_lru_size(lruvec, lru, zone, -delta); return; } - - /* promotion */ - if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { - __update_lru_size(lruvec, lru, zone, -delta); - __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); - } - - /* demotion requires isolation, e.g., lru_deactivate_fn() */ - VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming) @@ -238,8 +216,6 @@ static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bo gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; new_flags &= ~LRU_GEN_MASK; - if (!(new_flags & BIT(PG_referenced))) - new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); /* for shrink_page_list() */ if (reclaiming) new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim)); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a73f1f98766e..f2cb725919bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -328,34 +328,12 @@ enum lruvec_flags { #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U -/* - * Each generation is divided into multiple tiers. Tiers represent different - * ranges of numbers of accesses through file descriptors. A page accessed N - * times through file descriptors is in tier order_base_2(N). A page in the - * first tier (N=0,1) is marked by PG_referenced unless it was faulted in - * though page tables or read ahead. A page in any other tier (N>1) is marked - * by PG_referenced and PG_workingset. - * - * In contrast to moving across generations which requires the LRU lock, moving - * across tiers only requires operations on page->flags and therefore has a - * negligible cost in the buffered access path. In the eviction path, - * comparisons of refaulted/(evicted+protected) from the first tier and the - * rest infer whether pages accessed multiple times through file descriptors - * are statistically hot and thus worth protecting. - * - * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice of the - * categories of the active/inactive LRU when keeping track of accesses through - * file descriptors. It requires MAX_NR_TIERS-2 additional bits in page->flags. - */ -#define MAX_NR_TIERS 4U - #ifndef __GENERATING_BOUNDS_H struct lruvec; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) #ifdef CONFIG_LRU_GEN @@ -364,16 +342,6 @@ enum { LRU_GEN_FILE, }; -#define MIN_LRU_BATCH BITS_PER_LONG -#define MAX_LRU_BATCH (MIN_LRU_BATCH * 128) - -/* whether to keep historical stats from evicted generations */ -#ifdef CONFIG_LRU_GEN_STATS -#define NR_HIST_GENS MAX_NR_GENS -#else -#define NR_HIST_GENS 1U -#endif - /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are @@ -393,15 +361,6 @@ struct lru_gen_struct { struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the sizes of the above lists */ unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; - /* the exponential moving average of refaulted */ - unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; - /* the exponential moving average of evicted+protected */ - unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; - /* the first tier doesn't need protection, hence the minus one */ - unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; - /* can be modified without holding the LRU lock */ - atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; - atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; }; void lru_gen_init_lruvec(struct lruvec *lruvec); diff --git a/kernel/bounds.c b/kernel/bounds.c index 10dd9e6b03e5..e08fb89f87f4 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -24,7 +24,7 @@ int main(void) DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1)); - DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2); + DEFINE(LRU_REFS_WIDTH, 0); #else DEFINE(LRU_GEN_WIDTH, 0); DEFINE(LRU_REFS_WIDTH, 0); diff --git a/mm/Kconfig b/mm/Kconfig index 0eabdde788ec..724a538d82dc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -923,7 +923,6 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. -# multi-gen LRU { config LRU_GEN bool "Multi-Gen LRU" depends on MMU @@ -932,16 +931,6 @@ config LRU_GEN help A high performance LRU implementation to overcommit memory. -config LRU_GEN_STATS - bool "Full stats for debugging" - depends on LRU_GEN - help - Do not enable this option unless you plan to look at historical stats - from evicted generations for debugging purpose. - - This option has a per-memcg and per-node memory overhead. -# } - source "mm/damon/Kconfig" config ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT diff --git a/mm/swap.c b/mm/swap.c index d301ef965ebf..6fb0c11255b6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -389,43 +389,6 @@ static void __lru_cache_activate_page(struct page *page) local_unlock(&lru_pvecs.lock); } -#ifdef CONFIG_LRU_GEN -static void page_inc_refs(struct page *page) -{ - unsigned long refs; - unsigned long old_flags, new_flags; - - if (PageUnevictable(page)) - return; - - /* see the comment on MAX_NR_TIERS */ - do { - new_flags = old_flags = READ_ONCE(page->flags); - - if (!(new_flags & BIT(PG_referenced))) { - new_flags |= BIT(PG_referenced); - continue; - } - - if (!(new_flags & BIT(PG_workingset))) { - new_flags |= BIT(PG_workingset); - continue; - } - - refs = new_flags & LRU_REFS_MASK; - refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK); - - new_flags &= ~LRU_REFS_MASK; - new_flags |= refs; - } while (new_flags != old_flags && - cmpxchg(&page->flags, old_flags, new_flags) != old_flags); -} -#else -static void page_inc_refs(struct page *page) -{ -} -#endif /* CONFIG_LRU_GEN */ - /* * Mark a page as having seen activity. * @@ -440,11 +403,6 @@ void mark_page_accessed(struct page *page) { page = compound_head(page); - if (lru_gen_enabled()) { - page_inc_refs(page); - return; - } - if (!PageReferenced(page)) { SetPageReferenced(page); } else if (PageUnevictable(page)) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 9b13a8df850f..61098ae8d1c3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1153,11 +1153,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; - - /* get a shadow entry before mem_cgroup_swapout() clears page_memcg() */ + mem_cgroup_swapout(page, swap); if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(page, target_memcg); - mem_cgroup_swapout(page, swap); __delete_from_swap_cache(page, swap, shadow); xa_unlock_irq(&mapping->i_pages); put_swap_page(page, swap); @@ -2585,9 +2583,6 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) unsigned long file; struct lruvec *target_lruvec; - if (lru_gen_enabled()) - return; - target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); /* @@ -2918,38 +2913,11 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, * shorthand helpers ******************************************************************************/ -#define DEFINE_MAX_SEQ(lruvec) \ - unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) - -#define DEFINE_MIN_SEQ(lruvec) \ - unsigned long min_seq[ANON_AND_FILE] = { \ - READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \ - READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \ - } - #define for_each_gen_type_zone(gen, type, zone) \ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) -static int page_lru_gen(struct page *page) -{ - unsigned long flags = READ_ONCE(page->flags); - - return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; -} - -static int page_lru_tier(struct page *page) -{ - int refs; - unsigned long flags = READ_ONCE(page->flags); - - refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ? - ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0; - - return lru_tier_from_refs(refs); -} - static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) { struct pglist_data *pgdat = NODE_DATA(nid); @@ -2970,755 +2938,6 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) return pgdat ? &pgdat->__lruvec : NULL; } -static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) -{ - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - - if (!can_demote(pgdat->node_id, sc) && - mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) - return 0; - - return mem_cgroup_swappiness(memcg); -} - -static int get_nr_gens(struct lruvec *lruvec, int type) -{ - return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1; -} - -static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) -{ - /* see the comment on lru_gen_struct */ - return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS && - get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) && - get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; -} - -/****************************************************************************** - * refault feedback loop - ******************************************************************************/ - -/* - * A feedback loop based on Proportional-Integral-Derivative (PID) controller. - * - * The P term is refaulted/(evicted+protected) from a tier in the generation - * currently being evicted; the I term is the exponential moving average of the - * P term over the generations previously evicted, using the smoothing factor - * 1/2; the D term isn't supported. - * - * The setpoint (SP) is always the first tier of one type; the process variable - * (PV) is either any tier of the other type or any other tier of the same - * type. - * - * The error is the difference between the SP and the PV; the correction is - * turn off protection when SP>PV or turn on protection when SPlrugen; - int hist = lru_hist_from_seq(lrugen->min_seq[type]); - - pos->refaulted = lrugen->avg_refaulted[type][tier] + - atomic_long_read(&lrugen->refaulted[hist][type][tier]); - pos->total = lrugen->avg_total[type][tier] + - atomic_long_read(&lrugen->evicted[hist][type][tier]); - if (tier) - pos->total += lrugen->protected[hist][type][tier - 1]; - pos->gain = gain; -} - -static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) -{ - int hist, tier; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; - unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; - - lockdep_assert_held(&lruvec->lru_lock); - - if (!carryover && !clear) - return; - - hist = lru_hist_from_seq(seq); - - for (tier = 0; tier < MAX_NR_TIERS; tier++) { - if (carryover) { - unsigned long sum; - - sum = lrugen->avg_refaulted[type][tier] + - atomic_long_read(&lrugen->refaulted[hist][type][tier]); - WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2); - - sum = lrugen->avg_total[type][tier] + - atomic_long_read(&lrugen->evicted[hist][type][tier]); - if (tier) - sum += lrugen->protected[hist][type][tier - 1]; - WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2); - } - - if (clear) { - atomic_long_set(&lrugen->refaulted[hist][type][tier], 0); - atomic_long_set(&lrugen->evicted[hist][type][tier], 0); - if (tier) - WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0); - } - } -} - -static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) -{ - /* - * Return true if the PV has a limited number of refaults or a lower - * refaulted/total than the SP. - */ - return pv->refaulted < MIN_LRU_BATCH || - pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <= - (sp->refaulted + 1) * pv->total * pv->gain; -} - -/****************************************************************************** - * the aging - ******************************************************************************/ - -static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming) -{ - unsigned long old_flags, new_flags; - int type = page_is_file_lru(page); - struct lru_gen_struct *lrugen = &lruvec->lrugen; - int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); - - do { - new_flags = old_flags = READ_ONCE(page->flags); - VM_BUG_ON_PAGE(!(new_flags & LRU_GEN_MASK), page); - - new_gen = (old_gen + 1) % MAX_NR_GENS; - - new_flags &= ~LRU_GEN_MASK; - new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; - new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); - /* for end_page_writeback() */ - if (reclaiming) - new_flags |= BIT(PG_reclaim); - } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); - - lru_gen_update_size(lruvec, page, old_gen, new_gen); - - return new_gen; -} - -static void inc_min_seq(struct lruvec *lruvec) -{ - int type; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - - VM_BUG_ON(!seq_is_valid(lruvec)); - - for (type = 0; type < ANON_AND_FILE; type++) { - if (get_nr_gens(lruvec, type) != MAX_NR_GENS) - continue; - - reset_ctrl_pos(lruvec, type, true); - WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); - } -} - -static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) -{ - int gen, type, zone; - bool success = false; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - DEFINE_MIN_SEQ(lruvec); - - VM_BUG_ON(!seq_is_valid(lruvec)); - - for (type = !can_swap; type < ANON_AND_FILE; type++) { - while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { - gen = lru_gen_from_seq(min_seq[type]); - - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - if (!list_empty(&lrugen->lists[gen][type][zone])) - goto next; - } - - min_seq[type]++; - } -next: - ; - } - - /* see the comment on lru_gen_struct */ - if (can_swap) { - min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); - min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); - } - - for (type = !can_swap; type < ANON_AND_FILE; type++) { - if (min_seq[type] == lrugen->min_seq[type]) - continue; - - reset_ctrl_pos(lruvec, type, true); - WRITE_ONCE(lrugen->min_seq[type], min_seq[type]); - success = true; - } - - return success; -} - -static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq) -{ - int prev, next; - int type, zone; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - - spin_lock_irq(&lruvec->lru_lock); - - VM_BUG_ON(!seq_is_valid(lruvec)); - - if (max_seq != lrugen->max_seq) - goto unlock; - - inc_min_seq(lruvec); - - /* - * Update the active/inactive LRU sizes for compatibility. Both sides of - * the current max_seq need to be covered, since max_seq+1 can overlap - * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do - * overlap, cold/hot inversion happens. This can be solved by moving - * pages from min_seq to min_seq+1 but is omitted for simplicity. - */ - prev = lru_gen_from_seq(lrugen->max_seq - 1); - next = lru_gen_from_seq(lrugen->max_seq + 1); - - for (type = 0; type < ANON_AND_FILE; type++) { - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - enum lru_list lru = type * LRU_INACTIVE_FILE; - long delta = lrugen->nr_pages[prev][type][zone] - - lrugen->nr_pages[next][type][zone]; - - if (!delta) - continue; - - __update_lru_size(lruvec, lru, zone, delta); - __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta); - } - } - - for (type = 0; type < ANON_AND_FILE; type++) - reset_ctrl_pos(lruvec, type, false); - - /* make sure preceding modifications appear */ - smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); -unlock: - spin_unlock_irq(&lruvec->lru_lock); -} - -static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq, - unsigned long *min_seq, bool can_swap, bool *need_aging) -{ - int gen, type, zone; - long old = 0; - long young = 0; - long total = 0; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - - for (type = !can_swap; type < ANON_AND_FILE; type++) { - unsigned long seq; - - for (seq = min_seq[type]; seq <= max_seq; seq++) { - long size = 0; - - gen = lru_gen_from_seq(seq); - - for (zone = 0; zone < MAX_NR_ZONES; zone++) - size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); - - total += size; - if (seq == max_seq) - young += size; - if (seq + MIN_NR_GENS == max_seq) - old += size; - } - } - - /* - * The aging and the eviction is a typical producer-consumer model. The - * aging tries to be lazy to reduce the unnecessary overhead. On the - * other hand, the eviction stalls when the number of generations - * reaches MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1 - * generations, hence the first two if's. - * - * In addition, it's ideal to spread pages out evenly, meaning - * 1/(MIN_NR_GENS+1) of the total number of pages for each generation. A - * reasonable range for this average portion would [1/MIN_NR_GENS, - * 1/(MIN_NR_GENS+2)]. From the consumer's POV, the eviction only cares - * about the lower bound of cold pages, i.e., 1/(MIN_NR_GENS+2), whereas - * from the producer's POV, the aging only cares about the upper bound - * of hot pages, i.e., 1/MIN_NR_GENS. - */ - if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq) - *need_aging = true; - else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq) - *need_aging = false; - else if (young * MIN_NR_GENS > total) - *need_aging = true; - else if (old * (MIN_NR_GENS + 2) < total) - *need_aging = true; - else - *need_aging = false; - - return total > 0 ? total : 0; -} - -static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ - bool need_aging; - long nr_to_scan; - int swappiness = get_swappiness(lruvec, sc); - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - DEFINE_MAX_SEQ(lruvec); - DEFINE_MIN_SEQ(lruvec); - - mem_cgroup_calculate_protection(NULL, memcg); - - if (mem_cgroup_below_min(memcg)) - return; - - nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging); - if (!nr_to_scan) - return; - - nr_to_scan >>= sc->priority; - - if (!mem_cgroup_online(memcg)) - nr_to_scan++; - - if (nr_to_scan && need_aging && (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim)) - inc_max_seq(lruvec, max_seq); -} - -static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) -{ - struct mem_cgroup *memcg; - - VM_BUG_ON(!current_is_kswapd()); - - memcg = mem_cgroup_iter(NULL, NULL, NULL); - do { - struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); - - age_lruvec(lruvec, sc); - - cond_resched(); - } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); -} - -/****************************************************************************** - * the eviction - ******************************************************************************/ - -static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx) -{ - bool success; - int gen = page_lru_gen(page); - int type = page_is_file_lru(page); - int zone = page_zonenum(page); - int tier = page_lru_tier(page); - int delta = thp_nr_pages(page); - struct lru_gen_struct *lrugen = &lruvec->lrugen; - - VM_BUG_ON_PAGE(gen >= MAX_NR_GENS, page); - - if (!page_evictable(page)) { - success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); - SetPageUnevictable(page); - add_page_to_lru_list(page, lruvec); - __count_vm_events(UNEVICTABLE_PGCULLED, delta); - return true; - } - - if (type == LRU_GEN_FILE && PageAnon(page) && PageDirty(page)) { - success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); - SetPageSwapBacked(page); - add_page_to_lru_list_tail(page, lruvec); - return true; - } - - if (tier > tier_idx) { - int hist = lru_hist_from_seq(lrugen->min_seq[type]); - - gen = page_inc_gen(lruvec, page, false); - list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]); - - WRITE_ONCE(lrugen->protected[hist][type][tier - 1], - lrugen->protected[hist][type][tier - 1] + delta); - __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); - return true; - } - - if (PageLocked(page) || PageWriteback(page) || - (type == LRU_GEN_FILE && PageDirty(page))) { - gen = page_inc_gen(lruvec, page, true); - list_move(&page->lru, &lrugen->lists[gen][type][zone]); - return true; - } - - return false; -} - -static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_control *sc) -{ - bool success; - - if (!sc->may_unmap && page_mapped(page)) - return false; - - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && - (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page)))) - return false; - - if (!get_page_unless_zero(page)) - return false; - - if (!TestClearPageLRU(page)) { - put_page(page); - return false; - } - - success = lru_gen_del_page(lruvec, page, true); - VM_BUG_ON_PAGE(!success, page); - - return true; -} - -static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, - int type, int tier, struct list_head *list) -{ - int gen, zone; - enum vm_event_item item; - int sorted = 0; - int scanned = 0; - int isolated = 0; - int remaining = MAX_LRU_BATCH; - struct lru_gen_struct *lrugen = &lruvec->lrugen; - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - - VM_BUG_ON(!list_empty(list)); - - if (get_nr_gens(lruvec, type) == MIN_NR_GENS) - return 0; - - gen = lru_gen_from_seq(lrugen->min_seq[type]); - - for (zone = sc->reclaim_idx; zone >= 0; zone--) { - LIST_HEAD(moved); - int skipped = 0; - struct list_head *head = &lrugen->lists[gen][type][zone]; - - while (!list_empty(head)) { - struct page *page = lru_to_page(head); - int delta = thp_nr_pages(page); - - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageUnevictable(page), page); - VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); - VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); - - prefetchw_prev_lru_page(page, head, flags); - - scanned += delta; - - if (sort_page(lruvec, page, tier)) - sorted += delta; - else if (isolate_page(lruvec, page, sc)) { - list_add(&page->lru, list); - isolated += delta; - } else { - list_move(&page->lru, &moved); - skipped += delta; - } - - if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) - break; - } - - if (skipped) { - list_splice(&moved, head); - __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); - } - - if (!remaining || isolated >= MIN_LRU_BATCH) - break; - } - - item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; - if (!cgroup_reclaim(sc)) { - __count_vm_events(item, isolated); - __count_vm_events(PGREFILL, sorted); - } - __count_memcg_events(memcg, item, isolated); - __count_memcg_events(memcg, PGREFILL, sorted); - __count_vm_events(PGSCAN_ANON + type, isolated); - - /* - * There might not be eligible pages due to reclaim_idx, may_unmap and - * may_writepage. Check the remaining to prevent livelock if there is no - * progress. - */ - return isolated || !remaining ? scanned : 0; -} - -static int get_tier_idx(struct lruvec *lruvec, int type) -{ - int tier; - struct ctrl_pos sp, pv; - - /* - * To leave a margin for fluctuations, use a larger gain factor (1:2). - * This value is chosen because any other tier would have at least twice - * as many refaults as the first tier. - */ - read_ctrl_pos(lruvec, type, 0, 1, &sp); - for (tier = 1; tier < MAX_NR_TIERS; tier++) { - read_ctrl_pos(lruvec, type, tier, 2, &pv); - if (!positive_ctrl_err(&sp, &pv)) - break; - } - - return tier - 1; -} - -static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx) -{ - int type, tier; - struct ctrl_pos sp, pv; - int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness }; - - /* - * Compare the first tier of anon with that of file to determine which - * type to scan. Also need to compare other tiers of the selected type - * with the first tier of the other type to determine the last tier (of - * the selected type) to evict. - */ - read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp); - read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv); - type = positive_ctrl_err(&sp, &pv); - - read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp); - for (tier = 1; tier < MAX_NR_TIERS; tier++) { - read_ctrl_pos(lruvec, type, tier, gain[type], &pv); - if (!positive_ctrl_err(&sp, &pv)) - break; - } - - *tier_idx = tier - 1; - - return type; -} - -static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness, - int *type_scanned, struct list_head *list) -{ - int i; - int type; - int scanned; - int tier = -1; - DEFINE_MIN_SEQ(lruvec); - - VM_BUG_ON(!seq_is_valid(lruvec)); - - /* - * Try to make the obvious choice first. When anon and file are both - * available from the same generation, interpret swappiness 1 as file - * first and 200 as anon first. - */ - if (!swappiness) - type = LRU_GEN_FILE; - else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) - type = LRU_GEN_ANON; - else if (swappiness == 1) - type = LRU_GEN_FILE; - else if (swappiness == 200) - type = LRU_GEN_ANON; - else - type = get_type_to_scan(lruvec, swappiness, &tier); - - for (i = !swappiness; i < ANON_AND_FILE; i++) { - if (tier < 0) - tier = get_tier_idx(lruvec, type); - - scanned = scan_pages(lruvec, sc, type, tier, list); - if (scanned) - break; - - type = !type; - tier = -1; - } - - *type_scanned = type; - - return scanned; -} - -static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness) -{ - int type; - int scanned; - int reclaimed; - LIST_HEAD(list); - struct page *page; - enum vm_event_item item; - struct reclaim_stat stat; - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - - spin_lock_irq(&lruvec->lru_lock); - - scanned = isolate_pages(lruvec, sc, swappiness, &type, &list); - - if (try_to_inc_min_seq(lruvec, swappiness)) - scanned++; - - if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS) - scanned = 0; - - spin_unlock_irq(&lruvec->lru_lock); - - if (list_empty(&list)) - return scanned; - - reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false); - - /* - * To avoid livelock, don't add rejected pages back to the same lists - * they were isolated from. See lru_gen_add_page(). - */ - list_for_each_entry(page, &list, lru) { - ClearPageReferenced(page); - ClearPageWorkingset(page); - - if (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))) - ClearPageActive(page); - else if (page_is_file_lru(page) || PageSwapCache(page)) - SetPageActive(page); - } - - spin_lock_irq(&lruvec->lru_lock); - - move_pages_to_lru(lruvec, &list); - - item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; - if (!cgroup_reclaim(sc)) - __count_vm_events(item, reclaimed); - __count_memcg_events(memcg, item, reclaimed); - __count_vm_events(PGSTEAL_ANON + type, reclaimed); - - spin_unlock_irq(&lruvec->lru_lock); - - mem_cgroup_uncharge_list(&list); - free_unref_page_list(&list); - - sc->nr_reclaimed += reclaimed; - - return scanned; -} - -static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) -{ - bool need_aging; - long nr_to_scan; - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - DEFINE_MAX_SEQ(lruvec); - DEFINE_MIN_SEQ(lruvec); - - if (mem_cgroup_below_min(memcg) || - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) - return 0; - - nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging); - if (!nr_to_scan) - return 0; - - /* reset the priority if the target has been met */ - nr_to_scan >>= sc->nr_reclaimed < sc->nr_to_reclaim ? sc->priority : DEF_PRIORITY; - - if (!mem_cgroup_online(memcg)) - nr_to_scan++; - - if (!nr_to_scan) - return 0; - - if (!need_aging) - return nr_to_scan; - - /* leave the work to lru_gen_age_node() */ - if (current_is_kswapd()) - return 0; - - /* try other memcgs before going to the aging path */ - if (!cgroup_reclaim(sc) && !sc->force_deactivate) { - sc->skipped_deactivate = true; - return 0; - } - - inc_max_seq(lruvec, max_seq); - - return nr_to_scan; -} - -static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ - struct blk_plug plug; - long scanned = 0; - - lru_add_drain(); - - blk_start_plug(&plug); - - while (true) { - int delta; - int swappiness; - long nr_to_scan; - - if (sc->may_swap) - swappiness = get_swappiness(lruvec, sc); - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) - swappiness = 1; - else - swappiness = 0; - - nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); - if (!nr_to_scan) - break; - - delta = evict_pages(lruvec, sc, swappiness); - if (!delta) - break; - - scanned += delta; - if (scanned >= nr_to_scan) - break; - - cond_resched(); - } - - blk_finish_plug(&plug); -} - /****************************************************************************** * initialization ******************************************************************************/ @@ -3761,16 +2980,6 @@ static int __init init_lru_gen(void) }; late_initcall(init_lru_gen); -#else - -static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) -{ -} - -static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ -} - #endif /* CONFIG_LRU_GEN */ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) @@ -3784,11 +2993,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) struct blk_plug plug; bool scan_adjusted; - if (lru_gen_enabled()) { - lru_gen_shrink_lruvec(lruvec, sc); - return; - } - get_scan_count(lruvec, sc, nr); /* Record the original scan target for proportional adjustments later */ @@ -4261,9 +3465,6 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) struct lruvec *target_lruvec; unsigned long refaults; - if (lru_gen_enabled()) - return; - target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); target_lruvec->refaults[0] = refaults; @@ -4635,11 +3836,6 @@ static void age_active_anon(struct pglist_data *pgdat, struct mem_cgroup *memcg; struct lruvec *lruvec; - if (lru_gen_enabled()) { - lru_gen_age_node(pgdat, sc); - return; - } - if (!can_age_anon_pages(pgdat, sc)) return; diff --git a/mm/workingset.c b/mm/workingset.c index d8fc67998957..880d882f3325 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -187,6 +187,7 @@ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { + eviction >>= bucket_order; eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; @@ -211,116 +212,10 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *memcgidp = memcgid; *pgdat = NODE_DATA(nid); - *evictionp = entry; + *evictionp = entry << bucket_order; *workingsetp = workingset; } -#ifdef CONFIG_LRU_GEN - -static int page_lru_refs(struct page *page) -{ - unsigned long flags = READ_ONCE(page->flags); - - BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); - - /* see the comment on MAX_NR_TIERS */ - return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0; -} - -static void *lru_gen_eviction(struct page *page) -{ - int hist, tier; - unsigned long token; - unsigned long min_seq; - struct lruvec *lruvec; - struct lru_gen_struct *lrugen; - int type = page_is_file_lru(page); - int refs = page_lru_refs(page); - int delta = thp_nr_pages(page); - bool workingset = PageWorkingset(page); - struct mem_cgroup *memcg = page_memcg(page); - struct pglist_data *pgdat = page_pgdat(page); - - lruvec = mem_cgroup_lruvec(memcg, pgdat); - lrugen = &lruvec->lrugen; - min_seq = READ_ONCE(lrugen->min_seq[type]); - token = (min_seq << LRU_REFS_WIDTH) | refs; - - hist = lru_hist_from_seq(min_seq); - tier = lru_tier_from_refs(refs + workingset); - atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); - - return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); -} - -static void lru_gen_refault(struct page *page, void *shadow) -{ - int hist, tier, refs; - int memcg_id; - bool workingset; - unsigned long token; - unsigned long min_seq; - struct lruvec *lruvec; - struct lru_gen_struct *lrugen; - struct mem_cgroup *memcg; - struct pglist_data *pgdat; - int type = page_is_file_lru(page); - int delta = thp_nr_pages(page); - - unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset); - - refs = token & (BIT(LRU_REFS_WIDTH) - 1); - if (refs && !workingset) - return; - - if (page_pgdat(page) != pgdat) - return; - - rcu_read_lock(); - memcg = page_memcg_rcu(page); - if (mem_cgroup_id(memcg) != memcg_id) - goto unlock; - - token >>= LRU_REFS_WIDTH; - lruvec = mem_cgroup_lruvec(memcg, pgdat); - lrugen = &lruvec->lrugen; - min_seq = READ_ONCE(lrugen->min_seq[type]); - if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH))) - goto unlock; - - hist = lru_hist_from_seq(min_seq); - tier = lru_tier_from_refs(refs + workingset); - atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); - mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); - - /* - * Count the following two cases as stalls: - * 1. For pages accessed through page tables, hotter pages pushed out - * hot pages which refaulted immediately. - * 2. For pages accessed through file descriptors, numbers of accesses - * might have been beyond the limit. - */ - if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) { - SetPageWorkingset(page); - mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); - } -unlock: - rcu_read_unlock(); -} - -#else - -static void *lru_gen_eviction(struct page *page) -{ - return NULL; -} - -static void lru_gen_refault(struct page *page, void *shadow) -{ -} - -#endif /* CONFIG_LRU_GEN */ - /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged @@ -369,14 +264,10 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - if (lru_gen_enabled()) - return lru_gen_eviction(page); - lruvec = mem_cgroup_lruvec(target_memcg, pgdat); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); - eviction >>= bucket_order; workingset_age_nonresident(lruvec, thp_nr_pages(page)); return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } @@ -405,13 +296,7 @@ void workingset_refault(struct page *page, void *shadow) bool workingset; int memcgid; - if (lru_gen_enabled()) { - lru_gen_refault(page, shadow); - return; - } - unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); - eviction <<= bucket_order; rcu_read_lock(); /*