diff --git a/kernel/fork.c b/kernel/fork.c index 9ef103c05891..1109a10c5ccd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -659,7 +659,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, int retval; unsigned long charge = 0; LIST_HEAD(uf); - MA_STATE(old_mas, &oldmm->mm_mt, 0, 0); MA_STATE(mas, &mm->mm_mt, 0, 0); uprobe_start_dup_mmap(); @@ -687,16 +686,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; khugepaged_fork(mm, oldmm); - retval = mas_expected_entries(&mas, oldmm->map_count); - if (retval) + /* Use __mt_dup() to efficiently build an identical maple tree. */ + retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL); + if (unlikely(retval)) goto out; mt_clear_in_rcu(mas.tree); - mas_for_each(&old_mas, mpnt, ULONG_MAX) { + mas_for_each(&mas, mpnt, ULONG_MAX) { struct file *file; vma_start_write(mpnt); if (mpnt->vm_flags & VM_DONTCOPY) { + __mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + if (unlikely(mas_is_err(&mas))) { + retval = -ENOMEM; + goto loop_out; + } vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); continue; } @@ -758,12 +764,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (is_vm_hugetlb_page(tmp)) hugetlb_dup_vma_private(tmp); - /* Link the vma into the MT */ + /* + * Link the vma into the MT. After using __mt_dup(), memory + * allocation is not necessary here, so it cannot fail. + */ mas.index = tmp->vm_start; mas.last = tmp->vm_end - 1; mas_store(&mas, tmp); - if (mas_is_err(&mas)) - goto fail_nomem_mas_store; mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) @@ -772,15 +779,28 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); - if (retval) + if (retval) { + mpnt = mas_find(&mas, ULONG_MAX); goto loop_out; + } } /* a new mm has just been created */ retval = arch_dup_mmap(oldmm, mm); loop_out: mas_destroy(&mas); - if (!retval) + if (!retval) { mt_set_in_rcu(mas.tree); + } else if (mpnt) { + /* + * The entire maple tree has already been duplicated. If the + * mmap duplication fails, mark the failure point with + * XA_ZERO_ENTRY. In exit_mmap(), if this marker is encountered, + * stop releasing VMAs that have not been duplicated after this + * point. + */ + mas_set_range(&mas, mpnt->vm_start, mpnt->vm_end - 1); + mas_store(&mas, XA_ZERO_ENTRY); + } out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); @@ -790,8 +810,6 @@ fail_uprobe_end: uprobe_end_dup_mmap(); return retval; -fail_nomem_mas_store: - unlink_anon_vmas(tmp); fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: diff --git a/mm/memory.c b/mm/memory.c index 7f29d9394bdf..5e161551a5d1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -411,6 +411,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, * be 0. This will underflow and is okay. */ next = mas_find(&mas, ceiling - 1); + if (unlikely(xa_is_zero(next))) + next = NULL; /* * Hide vma from rmap and truncate_pagecache before freeing @@ -432,6 +434,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, && !is_vm_hugetlb_page(next)) { vma = next; next = mas_find(&mas, ceiling - 1); + if (unlikely(xa_is_zero(next))) + next = NULL; if (mm_wr_locked) vma_start_write(vma); unlink_anon_vmas(vma); @@ -1736,7 +1740,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, do { unmap_single_vma(tlb, vma, start_addr, end_addr, &details, mm_wr_locked); - } while ((vma = mas_find(&mas, end_t - 1)) != NULL); + vma = mas_find(&mas, end_t - 1); + } while (vma && likely(!xa_is_zero(vma))); mmu_notifier_invalidate_range_end(&range); } diff --git a/mm/mmap.c b/mm/mmap.c index bd2140cfcf36..d5c48b243869 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3303,10 +3303,11 @@ void exit_mmap(struct mm_struct *mm) arch_exit_mmap(mm); vma = mas_find(&mas, ULONG_MAX); - if (!vma) { + if (!vma || unlikely(xa_is_zero(vma))) { /* Can happen if dup_mmap() received an OOM */ mmap_read_unlock(mm); - return; + mmap_write_lock(mm); + goto destroy; } lru_add_drain(); @@ -3339,11 +3340,13 @@ void exit_mmap(struct mm_struct *mm) remove_vma(vma, true); count++; cond_resched(); - } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL); + vma = mas_find(&mas, ULONG_MAX); + } while (vma && likely(!xa_is_zero(vma))); BUG_ON(count != mm->map_count); trace_exit_mmap(mm); +destroy: __mt_destroy(&mm->mm_mt); mmap_write_unlock(mm); vm_unacct_memory(nr_accounted);