From f8b57162d8b5847f0d75fbf099cb45eaed17fa49 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:05 -0700 Subject: [PATCH] BACKPORT: mm: multi-gen LRU: clarify scan_control flags Among the flags in scan_control: 1. sc->may_swap, which indicates swap constraint due to memsw.max, is supported as usual. 2. sc->proactive, which indicates reclaim by memory.reclaim, may not opportunistically skip the aging path, since it is considered less latency sensitive. 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers swappiness to prioritize file LRU, since clean file pages are more likely to exist. 4. sc->may_writepage and sc->may_unmap, which indicates opportunistic reclaim, are rejected, since unmapped clean pages are already prioritized. Scanning for more of them is likely futile and can cause high reclaim latency when there is a large number of memcgs. The rest are handled by the existing code. Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton Bug: 274865848 (cherry picked from commit e9d4e1ee788097484606c32122f146d802a9c5fb) [TJ: Resolved conflict over 650081b837713d042d4d7bd6b43993ca4852d00c ("ANDROID: MGLRU: Don't skip anon reclaim if swap low")] [Yu: Resolve conflicts over absence of folios and proactive reclaim on 5.15] Change-Id: I75dd554826ccd4b5c0e4fc497f40d9e06d6c3673 Signed-off-by: T.J. Mercier --- mm/vmscan.c | 55 +++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 91b28a3b36b4..e598a769fc46 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2954,6 +2954,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); + if (!sc->may_swap) + return 0; + if (!can_demote(pgdat->node_id, sc)) return 0; @@ -3997,7 +4000,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ } while (err == -EAGAIN); } -static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) +static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc) { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; @@ -4005,7 +4008,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) VM_WARN_ON_ONCE(walk); walk = &pgdat->mm_walk; - } else if (!pgdat && !walk) { + } else if (!walk && force_alloc) { VM_WARN_ON_ONCE(current_is_kswapd()); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); @@ -4191,7 +4194,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, goto done; } - walk = set_mm_walk(NULL); + walk = set_mm_walk(NULL, true); if (!walk) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; @@ -4260,8 +4263,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MIN_SEQ(lruvec); - VM_WARN_ON_ONCE(sc->memcg_low_reclaim); - /* see the comment on lru_gen_page */ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); @@ -4517,12 +4518,8 @@ static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_c { bool success; - /* unmapping inhibited */ - if (!sc->may_unmap && page_mapped(page)) - return false; - /* swapping inhibited */ - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && + if (!(sc->gfp_mask & __GFP_IO) && (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page)))) return false; @@ -4619,9 +4616,8 @@ static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, __count_vm_events(PGSCAN_ANON + type, isolated); /* - * There might not be eligible pages due to reclaim_idx, may_unmap and - * may_writepage. Check the remaining to prevent livelock if it's not - * making progress. + * There might not be eligible pages due to reclaim_idx. Check the + * remaining to prevent livelock if it's not making progress. */ return isolated || !remaining ? scanned : 0; } @@ -4881,8 +4877,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); - if (mem_cgroup_below_min(memcg) || - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + if (mem_cgroup_below_min(memcg)) return 0; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) @@ -4910,17 +4905,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) long nr_to_scan; unsigned long scanned = 0; unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + int swappiness = get_swappiness(lruvec, sc); + + /* clean file pages are more likely to exist */ + if (swappiness && !(sc->gfp_mask & __GFP_IO)) + swappiness = 1; while (true) { int delta; - int swappiness; - - if (sc->may_swap) - swappiness = get_swappiness(lruvec, sc); - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) - swappiness = 1; - else - swappiness = 0; nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); if (nr_to_scan <= 0) @@ -5050,12 +5042,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc struct blk_plug plug; VM_WARN_ON_ONCE(global_reclaim(sc)); + VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap); lru_add_drain(); blk_start_plug(&plug); - set_mm_walk(lruvec_pgdat(lruvec)); + set_mm_walk(NULL, false); if (try_to_shrink_lruvec(lruvec, sc)) lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); @@ -5111,11 +5104,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * VM_WARN_ON_ONCE(!global_reclaim(sc)); + /* + * Unmapped clean pages are already prioritized. Scanning for more of + * them is likely futile and can cause high reclaim latency when there + * is a large number of memcgs. + */ + if (!sc->may_writepage || !sc->may_unmap) + goto done; + lru_add_drain(); blk_start_plug(&plug); - set_mm_walk(pgdat); + set_mm_walk(pgdat, false); set_initial_priority(pgdat, sc); @@ -5133,7 +5134,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * clear_mm_walk(); blk_finish_plug(&plug); - +done: /* kswapd should never fail */ pgdat->kswapd_failures = 0; } @@ -5705,7 +5706,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, set_task_reclaim_state(current, &sc.reclaim_state); flags = memalloc_noreclaim_save(); blk_start_plug(&plug); - if (!set_mm_walk(NULL)) { + if (!set_mm_walk(NULL, true)) { err = -ENOMEM; goto done; }