diff --git a/include/linux/mm.h b/include/linux/mm.h index 37dccbc5cafe..f75aca682130 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -549,6 +549,12 @@ struct vm_fault { * page table to avoid allocation from * atomic context. */ + /* + * These entries are required when handling speculative page fault. + * This way the page handling is done using consistent field values. + */ + unsigned long vma_flags; + pgprot_t vma_page_prot; }; /* page entry size for vm->huge_fault() */ @@ -982,9 +988,9 @@ void free_compound_page(struct page *page); * pte_mkwrite. But get_user_pages can cause write faults for mappings * that do not have writing enabled, when used by access_process_vm. */ -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) +static inline pte_t maybe_mkwrite(pte_t pte, unsigned long vma_flags) { - if (likely(vma->vm_flags & VM_WRITE)) + if (likely(vma_flags & VM_WRITE)) pte = pte_mkwrite(pte); return pte; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 85eda66eb625..8a512e4899a2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2130,7 +2130,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_swp_mkuffd_wp(entry); } else { entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot)); - entry = maybe_mkwrite(entry, vma); + entry = maybe_mkwrite(entry, vma->vm_flags); if (!write) entry = pte_wrprotect(entry); if (!young) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9a3f06cdcc2a..41805cbcb3ea 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4298,6 +4298,8 @@ retry: .vma = vma, .address = haddr, .flags = flags, + .vma_flags = vma->vm_flags, + .vma_page_prot = vma->vm_page_prot, /* * Hard to debug if it ends up being * used by a callee that assumes diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 5c27832b45b2..0022a28b84bb 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1000,6 +1000,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, .flags = FAULT_FLAG_ALLOW_RETRY, .pmd = pmd, .pgoff = linear_page_index(vma, address), + .vma_flags = vma->vm_flags, + .vma_page_prot = vma->vm_page_prot, }; vmf.pte = pte_offset_map(pmd, address); diff --git a/mm/memory.c b/mm/memory.c index 649807cf0510..0346797bb118 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -856,7 +856,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma /* All done, just insert the new page copy in the child */ pte = mk_pte(new_page, dst_vma->vm_page_prot); - pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma); + pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma->vm_flags); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); return 0; } @@ -1962,7 +1962,8 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, goto out_unlock; } entry = pte_mkyoung(*pte); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + entry = maybe_mkwrite(pte_mkdirty(entry), + vma->vm_flags); if (ptep_set_access_flags(vma, addr, pte, entry, 1)) update_mmu_cache(vma, addr, pte); } @@ -1977,7 +1978,7 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (mkwrite) { entry = pte_mkyoung(entry); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + entry = maybe_mkwrite(pte_mkdirty(entry), vma->vm_flags); } set_pte_at(mm, addr, pte, entry); @@ -2844,7 +2845,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf) flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = pte_mkyoung(vmf->orig_pte); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags); if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2935,9 +2936,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) inc_mm_counter_fast(mm, MM_ANONPAGES); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); - entry = mk_pte(new_page, vma->vm_page_prot); + entry = mk_pte(new_page, vmf->vma_page_prot); entry = pte_sw_mkyoung(entry); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags); /* * Clear the pte entry and flush it first, before updating the * pte with the new entry. This will avoid a race condition @@ -3001,7 +3002,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ - if (page_copied && (vma->vm_flags & VM_LOCKED)) { + if (page_copied && (vmf->vma_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ if (PageMlocked(old_page)) munlock_vma_page(old_page); @@ -3036,7 +3037,7 @@ out: */ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { - WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); + WARN_ON_ONCE(!(vmf->vma_flags & VM_SHARED)); if (!pte_map_lock(vmf)) return VM_FAULT_RETRY; /* @@ -3145,7 +3146,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) * We should not cow pages in a shared writeable mapping. * Just mark the pages writable and/or call ops->pfn_mkwrite. */ - if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + if ((vmf->vma_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) return wp_pfn_shared(vmf); @@ -3177,7 +3178,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) unlock_page(page); wp_page_reuse(vmf); return VM_FAULT_WRITE; - } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + } else if (unlikely((vmf->vma_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { return wp_page_shared(vmf); } @@ -3467,9 +3468,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); - pte = mk_pte(page, vma->vm_page_prot); + pte = mk_pte(page, vmf->vma_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { - pte = maybe_mkwrite(pte_mkdirty(pte), vma); + pte = maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; exclusive = RMAP_EXCLUSIVE; @@ -3495,7 +3496,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) swap_free(entry); if (mem_cgroup_swap_full(page) || - (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + (vmf->vma_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); if (page != swapcache && swapcache) { @@ -3550,7 +3551,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) pte_t entry; /* File mapping without ->vm_ops ? */ - if (vma->vm_flags & VM_SHARED) + if (vmf->vma_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* @@ -3574,7 +3575,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), - vma->vm_page_prot)); + vmf->vma_page_prot)); if (!pte_map_lock(vmf)) return VM_FAULT_RETRY; if (!pte_none(*vmf->pte)) { @@ -3610,9 +3611,9 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) */ __SetPageUptodate(page); - entry = mk_pte(page, vma->vm_page_prot); + entry = mk_pte(page, vmf->vma_page_prot); entry = pte_sw_mkyoung(entry); - if (vma->vm_flags & VM_WRITE) + if (vmf->vma_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); if (!pte_map_lock(vmf)) { @@ -3820,7 +3821,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) for (i = 0; i < HPAGE_PMD_NR; i++) flush_icache_page(vma, page + i); - entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = mk_huge_pmd(page, vmf->vma_page_prot); if (write) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); @@ -3892,12 +3893,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) } flush_icache_page(vma, page); - entry = mk_pte(page, vma->vm_page_prot); + entry = mk_pte(page, vmf->vma_page_prot); entry = pte_sw_mkyoung(entry); if (write) - entry = maybe_mkwrite(pte_mkdirty(entry), vma); + entry = maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags); /* copy-on-write page */ - if (write && !(vma->vm_flags & VM_SHARED)) { + if (write && !(vmf->vma_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); lru_cache_add_inactive_or_unevictable(page, vma); @@ -3936,7 +3937,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf) /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && - !(vmf->vma->vm_flags & VM_SHARED)) + !(vmf->vma_flags & VM_SHARED)) page = vmf->cow_page; else page = vmf->page; @@ -4214,7 +4215,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf) } } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); - else if (!(vma->vm_flags & VM_SHARED)) + else if (!(vmf->vma_flags & VM_SHARED)) ret = do_cow_fault(vmf); else ret = do_shared_fault(vmf); @@ -4271,7 +4272,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * accessible ptes, some can allow access by kernel mode. */ old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); - pte = pte_modify(old_pte, vma->vm_page_prot); + pte = pte_modify(old_pte, vmf->vma_page_prot); pte = pte_mkyoung(pte); if (was_writable) pte = pte_mkwrite(pte); @@ -4305,7 +4306,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * Flag if the page is shared between multiple address spaces. This * is later used when determining whether to group tasks together */ - if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) + if (page_mapcount(page) > 1 && (vmf->vma_flags & VM_SHARED)) flags |= TNF_SHARED; last_cpupid = page_cpupid_last(page); @@ -4511,6 +4512,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, .flags = flags, .pgoff = linear_page_index(vma, address), .gfp_mask = __get_fault_gfp_mask(vma), + .vma_flags = vma->vm_flags, + .vma_page_prot = vma->vm_page_prot, }; unsigned int dirty = flags & FAULT_FLAG_WRITE; struct mm_struct *mm = vma->vm_mm; diff --git a/mm/migrate.c b/mm/migrate.c index 8ea0c65f1075..2c391485f664 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -242,7 +242,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, */ entry = pte_to_swp_entry(*pvmw.pte); if (is_write_migration_entry(entry)) - pte = maybe_mkwrite(pte, vma); + pte = maybe_mkwrite(pte, vma->vm_flags); else if (pte_swp_uffd_wp(*pvmw.pte)) pte = pte_mkuffd_wp(pte);