From a264d8efcb8f7231898dc4cb306ef8e20e6e285e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 19 Apr 2023 10:58:36 -0700 Subject: [PATCH] BACKPORT: mm: do not increment pgfault stats when page fault handler retries If the page fault handler requests a retry, we will count the fault multiple times. This is a relatively harmless problem as the retry paths are not often requested, and the only user-visible problem is that the fault counter will be slightly higher than it should be. Nevertheless, userspace only took one fault, and should not see the fact that the kernel had to retry the fault multiple times. Move page fault accounting into mm_account_fault() and skip incomplete faults which will be accounted upon completion. Link: https://lkml.kernel.org/r/20230419175836.3857458-1-surenb@google.com Fixes: d065bd810b6d ("mm: retry page fault when blocking on disk transfer") Signed-off-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Peter Xu Cc: Davidlohr Bueso Cc: Jan Kara Cc: Johannes Weiner Cc: Josef Bacik Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Michel Lespinasse Cc: Minchan Kim Cc: Punit Agrawal Cc: Vlastimil Babka Signed-off-by: Andrew Morton (cherry picked from commit 53156443a30368c0759c22e54a8d5cacc1b543cc) [surenb: resolve differences in handle_mm_fault() between 6.1 and 6.4 kernel versions] Bug: 161210518 Change-Id: Ic8cd807128ffd2c77a4db2af85b64bc24cc5052b Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index a0c585683f8a..16063c490b7f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5142,24 +5142,31 @@ retry_pud: * updates. However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should * still be in per-arch page fault handlers at the entry of page fault. */ -static inline void mm_account_fault(struct pt_regs *regs, +static inline void mm_account_fault(struct mm_struct *mm, struct pt_regs *regs, unsigned long address, unsigned int flags, vm_fault_t ret) { bool major; + /* Incomplete faults will be accounted upon completion. */ + if (ret & VM_FAULT_RETRY) + return; + /* - * We don't do accounting for some specific faults: - * - * - Unsuccessful faults (e.g. when the address wasn't valid). That - * includes arch_vma_access_permitted() failing before reaching here. - * So this is not a "this many hardware page faults" counter. We - * should use the hw profiling for that. - * - * - Incomplete faults (VM_FAULT_RETRY). They will only be counted - * once they're completed. + * To preserve the behavior of older kernels, PGFAULT counters record + * both successful and failed faults, as opposed to perf counters, + * which ignore failed cases. */ - if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) + count_vm_event(PGFAULT); + count_memcg_event_mm(mm, PGFAULT); + + /* + * Do not account for unsuccessful faults (e.g. when the address wasn't + * valid). That includes arch_vma_access_permitted() failing before + * reaching here. So this is not a "this many hardware page faults" + * counter. We should use the hw profiling for that. + */ + if (ret & VM_FAULT_ERROR) return; /* @@ -5218,20 +5225,21 @@ static void lru_gen_exit_fault(void) vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) { + /* If the fault handler drops the mmap_lock, vma may be freed */ + struct mm_struct *mm = vma->vm_mm; vm_fault_t ret; __set_current_state(TASK_RUNNING); - count_vm_event(PGFAULT); - count_memcg_event_mm(vma->vm_mm, PGFAULT); - /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, - flags & FAULT_FLAG_REMOTE)) - return VM_FAULT_SIGSEGV; + flags & FAULT_FLAG_REMOTE)) { + ret = VM_FAULT_SIGSEGV; + goto out; + } /* * Enable the memcg OOM handling for faults triggered in user @@ -5260,8 +5268,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) mem_cgroup_oom_synchronize(false); } - - mm_account_fault(regs, address, flags, ret); +out: + mm_account_fault(mm, regs, address, flags, ret); return ret; }