From fed85f95734d08cd3d2bb1c8b904c2278c54e84e Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Fri, 26 May 2023 17:53:27 -0700 Subject: [PATCH] Revert "ANDROID: BACKPORT: mm: Multi-gen LRU: remove wait_event_killable()" This reverts commit 397665b3edde04b889bf8d24cde0b85cc8914342. Will be replace by closer to upstream verion and ABI will be updated. Bug: 277906484 Change-Id: Ieabfeaad50ac5001f6a5b87c1dd1051d47bc40af Signed-off-by: Kalesh Singh --- include/linux/mmzone.h | 8 +-- mm/vmscan.c | 111 ++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 77c66382d964..759eab6cbb94 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -460,17 +460,17 @@ enum { struct lru_gen_mm_state { /* set to max_seq after each iteration */ unsigned long seq; - /* where the current iteration continues after */ + /* where the current iteration continues (inclusive) */ struct list_head *head; - /* where the last iteration ended before */ + /* where the last iteration ended (exclusive) */ struct list_head *tail; - /* Unused - keep for ABI compatiiblity */ + /* to wait for the last page table walker to finish */ struct wait_queue_head wait; /* Bloom filters flip after each iteration */ unsigned long *filters[NR_BLOOM_FILTERS]; /* the mm stats for debugging */ unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; - /* Unused - keep for ABI compatiiblity */ + /* the number of concurrent page table walkers */ int nr_walkers; }; diff --git a/mm/vmscan.c b/mm/vmscan.c index 5968e19e9441..7a26a306cfb3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3051,13 +3051,18 @@ void lru_gen_del_mm(struct mm_struct *mm) if (!lruvec) continue; - /* where the current iteration continues after */ - if (lruvec->mm_state.head == &mm->lru_gen.list) - lruvec->mm_state.head = lruvec->mm_state.head->prev; - - /* where the last iteration ended before */ + /* where the last iteration ended (exclusive) */ if (lruvec->mm_state.tail == &mm->lru_gen.list) lruvec->mm_state.tail = lruvec->mm_state.tail->next; + + /* where the current iteration continues (inclusive) */ + if (lruvec->mm_state.head != &mm->lru_gen.list) + continue; + + lruvec->mm_state.head = lruvec->mm_state.head->next; + /* the deletion ends the current iteration */ + if (lruvec->mm_state.head == &mm_list->fifo) + WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); } list_del_init(&mm->lru_gen.list); @@ -3241,54 +3246,68 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, struct mm_struct **iter) { bool first = false; - bool last = false; + bool last = true; struct mm_struct *mm = NULL; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); struct lru_gen_mm_state *mm_state = &lruvec->mm_state; /* - * mm_state->seq is incremented after each iteration of mm_list. There - * are three interesting cases for this page table walker: - * 1. It tries to start a new iteration with a stale max_seq: there is - * nothing left to do. - * 2. It started the next iteration: it needs to reset the Bloom filter - * so that a fresh set of PTE tables can be recorded. - * 3. It ended the current iteration: it needs to reset the mm stats - * counters and tell its caller to increment max_seq. + * There are four interesting cases for this page table walker: + * 1. It tries to start a new iteration of mm_list with a stale max_seq; + * there is nothing left to do. + * 2. It's the first of the current generation, and it needs to reset + * the Bloom filter for the next generation. + * 3. It reaches the end of mm_list, and it needs to increment + * mm_state->seq; the iteration is done. + * 4. It's the last of the current generation, and it needs to reset the + * mm stats counters for the next generation. */ spin_lock(&mm_list->lock); VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq); + VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq); + VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers); - if (walk->max_seq <= mm_state->seq) + if (walk->max_seq <= mm_state->seq) { + if (!*iter) + last = false; goto done; + } - if (!mm_state->head) - mm_state->head = &mm_list->fifo; + if (!mm_state->nr_walkers) { + VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); - if (mm_state->head == &mm_list->fifo) + mm_state->head = mm_list->fifo.next; first = true; + } + + while (!mm && mm_state->head != &mm_list->fifo) { + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); - do { mm_state->head = mm_state->head->next; - if (mm_state->head == &mm_list->fifo) { - WRITE_ONCE(mm_state->seq, mm_state->seq + 1); - last = true; - break; - } /* force scan for those added after the last iteration */ - if (!mm_state->tail || mm_state->tail == mm_state->head) { - mm_state->tail = mm_state->head->next; + if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { + mm_state->tail = mm_state->head; walk->force_scan = true; } - mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); if (should_skip_mm(mm, walk)) mm = NULL; - } while (!mm); + } + + if (mm_state->head == &mm_list->fifo) + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); done: + if (*iter && !mm) + mm_state->nr_walkers--; + if (!*iter && mm) + mm_state->nr_walkers++; + + if (mm_state->nr_walkers) + last = false; + if (*iter || last) reset_mm_stats(lruvec, walk, last); @@ -3316,9 +3335,9 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq); - if (max_seq > mm_state->seq) { - mm_state->head = NULL; - mm_state->tail = NULL; + if (max_seq > mm_state->seq && !mm_state->nr_walkers) { + VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo); + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); reset_mm_stats(lruvec, NULL, true); success = true; @@ -3927,6 +3946,10 @@ restart: walk_pmd_range(&val, addr, next, args); + /* a racy check to curtail the waiting time */ + if (wq_has_sleeper(&walk->lruvec->mm_state.wait)) + return 1; + if (need_resched() || walk->batched >= MAX_LRU_BATCH) { end = (addr | ~PUD_MASK) + 1; goto done; @@ -3959,14 +3982,8 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ walk->next_addr = FIRST_USER_ADDRESS; do { - DEFINE_MAX_SEQ(lruvec); - err = -EBUSY; - /* another thread might have called inc_max_seq() */ - if (walk->max_seq != max_seq) - break; - /* page_update_gen() requires stable page_memcg() */ if (!mem_cgroup_trylock_pages(memcg)) break; @@ -4199,12 +4216,25 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, success = iterate_mm_list(lruvec, walk, &mm); if (mm) walk_mm(lruvec, mm, walk); + + cond_resched(); } while (mm); done: - if (success) - inc_max_seq(lruvec, can_swap, force_scan); + if (!success) { + if (sc->priority <= DEF_PRIORITY - 2) + wait_event_killable(lruvec->mm_state.wait, + max_seq < READ_ONCE(lrugen->max_seq)); + return false; + } - return success; + VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); + + inc_max_seq(lruvec, can_swap, force_scan); + /* either this sees any waiters or they will see updated max_seq */ + if (wq_has_sleeper(&lruvec->mm_state.wait)) + wake_up_all(&lruvec->mm_state.wait); + + return true; } static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) @@ -5771,6 +5801,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) INIT_LIST_HEAD(&lrugen->pages[gen][type][zone]); lruvec->mm_state.seq = MIN_NR_GENS; + init_waitqueue_head(&lruvec->mm_state.wait); } #ifdef CONFIG_MEMCG