mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 18:41:58 +09:00
BACKPORT: userfaultfd: protect mmap_changing with rw_sem in userfaulfd_ctx
Increments and loads to mmap_changing are always in mmap_lock critical section. This ensures that if userspace requests event notification for non-cooperative operations (e.g. mremap), userfaultfd operations don't occur concurrently. This can be achieved by using a separate read-write semaphore in userfaultfd_ctx such that increments are done in write-mode and loads in read-mode, thereby eliminating the dependency on mmap_lock for this purpose. This is a preparatory step before we replace mmap_lock usage with per-vma locks in fill/move ioctls. Link: https://lkml.kernel.org/r/20240215182756.3448972-3-lokeshgidra@google.com Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Brian Geffon <bgeffon@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Nicolas Geoffray <ngeoffray@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Tim Murray <timmurray@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> (cherry picked from commit 5e4c24a57b0c126686534b5b159a406c5dd02400) Conflicts: fs/userfaultfd.c include/linux/userfaultfd_k.h mm/userfaultfd.c 1. Functions passing control from fs/userfaultfd.c to mm/userfaultfd.c were renamed after 6.1. a. Replace mfill_atomic_copy() with mcopy_atomic() b. Replace mfill_atomic_zeropage() with mfill_zeropage() c. Replace mfill_atomic_continue() with mcopy_continue() d. Replace mfill_atomic() with __mcopy_atomic() e. Replace mfill_atomic_hugetlb() with __mcopy_atomic_hugetlb() 2. uffd flags were unified into a single parameter after 6.1. Replace 'flags' with 'mcopy_mode' and 'mode'. 3. Fetch dst_mm from dst_vma in __mcopy_atomic_hugetlb(). Bug: 320478828 Change-Id: I77615c36a0c891801c9eb9de3609df4e7f125c39 Signed-off-by: Lokesh Gidra <lokeshgidra@google.com>
This commit is contained in:
@@ -650,12 +650,15 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
|
||||
ctx->flags = octx->flags;
|
||||
ctx->features = octx->features;
|
||||
ctx->released = false;
|
||||
init_rwsem(&ctx->map_changing_lock);
|
||||
atomic_set(&ctx->mmap_changing, 0);
|
||||
ctx->mm = vma->vm_mm;
|
||||
mmgrab(ctx->mm);
|
||||
|
||||
userfaultfd_ctx_get(octx);
|
||||
down_write(&octx->map_changing_lock);
|
||||
atomic_inc(&octx->mmap_changing);
|
||||
up_write(&octx->map_changing_lock);
|
||||
fctx->orig = octx;
|
||||
fctx->new = ctx;
|
||||
list_add_tail(&fctx->list, fcs);
|
||||
@@ -702,7 +705,9 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
|
||||
if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
|
||||
vm_ctx->ctx = ctx;
|
||||
userfaultfd_ctx_get(ctx);
|
||||
down_write(&ctx->map_changing_lock);
|
||||
atomic_inc(&ctx->mmap_changing);
|
||||
up_write(&ctx->map_changing_lock);
|
||||
} else {
|
||||
/* Drop uffd context if remap feature not enabled */
|
||||
vma_start_write(vma);
|
||||
@@ -748,7 +753,9 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
|
||||
return true;
|
||||
|
||||
userfaultfd_ctx_get(ctx);
|
||||
down_write(&ctx->map_changing_lock);
|
||||
atomic_inc(&ctx->mmap_changing);
|
||||
up_write(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
msg_init(&ewq.msg);
|
||||
@@ -790,7 +797,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
|
||||
return -ENOMEM;
|
||||
|
||||
userfaultfd_ctx_get(ctx);
|
||||
down_write(&ctx->map_changing_lock);
|
||||
atomic_inc(&ctx->mmap_changing);
|
||||
up_write(&ctx->map_changing_lock);
|
||||
unmap_ctx->ctx = ctx;
|
||||
unmap_ctx->start = start;
|
||||
unmap_ctx->end = end;
|
||||
@@ -1708,9 +1717,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
|
||||
if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
|
||||
goto out;
|
||||
if (mmget_not_zero(ctx->mm)) {
|
||||
ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
|
||||
uffdio_copy.len, &ctx->mmap_changing,
|
||||
uffdio_copy.mode);
|
||||
ret = mcopy_atomic(ctx, uffdio_copy.dst, uffdio_copy.src,
|
||||
uffdio_copy.len, uffdio_copy.mode);
|
||||
mmput(ctx->mm);
|
||||
} else {
|
||||
return -ESRCH;
|
||||
@@ -1760,9 +1768,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
|
||||
goto out;
|
||||
|
||||
if (mmget_not_zero(ctx->mm)) {
|
||||
ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
|
||||
uffdio_zeropage.range.len,
|
||||
&ctx->mmap_changing);
|
||||
ret = mfill_zeropage(ctx, uffdio_zeropage.range.start,
|
||||
uffdio_zeropage.range.len);
|
||||
mmput(ctx->mm);
|
||||
} else {
|
||||
return -ESRCH;
|
||||
@@ -1817,9 +1824,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
|
||||
return -EINVAL;
|
||||
|
||||
if (mmget_not_zero(ctx->mm)) {
|
||||
ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
|
||||
uffdio_wp.range.len, mode_wp,
|
||||
&ctx->mmap_changing);
|
||||
ret = mwriteprotect_range(ctx, uffdio_wp.range.start,
|
||||
uffdio_wp.range.len, mode_wp);
|
||||
mmput(ctx->mm);
|
||||
} else {
|
||||
return -ESRCH;
|
||||
@@ -1870,9 +1876,8 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
|
||||
goto out;
|
||||
|
||||
if (mmget_not_zero(ctx->mm)) {
|
||||
ret = mcopy_continue(ctx->mm, uffdio_continue.range.start,
|
||||
uffdio_continue.range.len,
|
||||
&ctx->mmap_changing);
|
||||
ret = mcopy_continue(ctx, uffdio_continue.range.start,
|
||||
uffdio_continue.range.len);
|
||||
mmput(ctx->mm);
|
||||
} else {
|
||||
return -ESRCH;
|
||||
@@ -1943,13 +1948,14 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
|
||||
if (mmget_not_zero(mm)) {
|
||||
mmap_read_lock(mm);
|
||||
|
||||
/* Re-check after taking mmap_lock */
|
||||
/* Re-check after taking map_changing_lock */
|
||||
down_read(&ctx->map_changing_lock);
|
||||
if (likely(!atomic_read(&ctx->mmap_changing)))
|
||||
ret = move_pages(ctx, mm, uffdio_move.dst, uffdio_move.src,
|
||||
uffdio_move.len, uffdio_move.mode);
|
||||
else
|
||||
ret = -EAGAIN;
|
||||
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
} else {
|
||||
@@ -2146,6 +2152,7 @@ static int new_userfaultfd(int flags)
|
||||
ctx->flags = flags;
|
||||
ctx->features = 0;
|
||||
ctx->released = false;
|
||||
init_rwsem(&ctx->map_changing_lock);
|
||||
atomic_set(&ctx->mmap_changing, 0);
|
||||
ctx->mm = current->mm;
|
||||
/* prevent the mm struct to be freed */
|
||||
|
||||
@@ -69,6 +69,13 @@ struct userfaultfd_ctx {
|
||||
unsigned int features;
|
||||
/* released */
|
||||
bool released;
|
||||
/*
|
||||
* Prevents userfaultfd operations (fill/move/wp) from happening while
|
||||
* some non-cooperative event(s) is taking place. Increments are done
|
||||
* in write-mode. Whereas, userfaultfd operations, which includes
|
||||
* reading mmap_changing, is done under read-mode.
|
||||
*/
|
||||
struct rw_semaphore map_changing_lock;
|
||||
/* memory mappings are changing because of non-cooperative event */
|
||||
atomic_t mmap_changing;
|
||||
/* mm with one ore more vmas attached to this userfaultfd_ctx */
|
||||
@@ -100,18 +107,14 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
|
||||
unsigned long dst_addr, struct page *page,
|
||||
bool newly_allocated, bool wp_copy);
|
||||
|
||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len,
|
||||
atomic_t *mmap_changing, __u64 mode);
|
||||
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
|
||||
unsigned long dst_start,
|
||||
unsigned long len,
|
||||
atomic_t *mmap_changing);
|
||||
extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long len, atomic_t *mmap_changing);
|
||||
extern int mwriteprotect_range(struct mm_struct *dst_mm,
|
||||
unsigned long start, unsigned long len,
|
||||
bool enable_wp, atomic_t *mmap_changing);
|
||||
extern ssize_t mcopy_atomic(struct userfaultfd_ctx *ctx, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len, __u64 mode);
|
||||
extern ssize_t mfill_zeropage(struct userfaultfd_ctx *ctx,
|
||||
unsigned long dst_start, unsigned long len);
|
||||
extern ssize_t mcopy_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start,
|
||||
unsigned long len);
|
||||
extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
|
||||
unsigned long len, bool enable_wp);
|
||||
extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long len, bool enable_wp);
|
||||
|
||||
|
||||
@@ -322,15 +322,16 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
|
||||
* __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
|
||||
* called with mmap_lock held, it will release mmap_lock before returning.
|
||||
*/
|
||||
static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
|
||||
static __always_inline ssize_t __mcopy_atomic_hugetlb(
|
||||
struct userfaultfd_ctx *ctx,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_start,
|
||||
unsigned long src_start,
|
||||
unsigned long len,
|
||||
enum mcopy_atomic_mode mode,
|
||||
bool wp_copy,
|
||||
atomic_t *mmap_changing)
|
||||
bool wp_copy)
|
||||
{
|
||||
struct mm_struct *dst_mm = dst_vma->vm_mm;
|
||||
int vm_shared = dst_vma->vm_flags & VM_SHARED;
|
||||
ssize_t err;
|
||||
pte_t *dst_pte;
|
||||
@@ -349,6 +350,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
|
||||
* feature is not supported.
|
||||
*/
|
||||
if (mode == MCOPY_ATOMIC_ZEROPAGE) {
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -434,6 +436,7 @@ retry:
|
||||
cond_resched();
|
||||
|
||||
if (unlikely(err == -ENOENT)) {
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
BUG_ON(!page);
|
||||
|
||||
@@ -446,12 +449,13 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
mmap_read_lock(dst_mm);
|
||||
down_read(&ctx->map_changing_lock);
|
||||
/*
|
||||
* If memory mappings are changing because of non-cooperative
|
||||
* operation (e.g. mremap) running in parallel, bail out and
|
||||
* request the user to retry later
|
||||
*/
|
||||
if (mmap_changing && atomic_read(mmap_changing)) {
|
||||
if (atomic_read(&ctx->mmap_changing)) {
|
||||
err = -EAGAIN;
|
||||
break;
|
||||
}
|
||||
@@ -474,6 +478,7 @@ retry:
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
out:
|
||||
if (page)
|
||||
@@ -485,14 +490,13 @@ out:
|
||||
}
|
||||
#else /* !CONFIG_HUGETLB_PAGE */
|
||||
/* fail at build time if gcc attempts to use this */
|
||||
extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
|
||||
extern ssize_t __mcopy_atomic_hugetlb(struct userfaultfd_ctx *ctx,
|
||||
struct vm_area_struct *dst_vma,
|
||||
unsigned long dst_start,
|
||||
unsigned long src_start,
|
||||
unsigned long len,
|
||||
enum mcopy_atomic_mode mode,
|
||||
bool wp_copy,
|
||||
atomic_t *mmap_changing);
|
||||
bool wp_copy);
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
|
||||
@@ -539,14 +543,14 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
|
||||
return err;
|
||||
}
|
||||
|
||||
static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
|
||||
static __always_inline ssize_t __mcopy_atomic(struct userfaultfd_ctx *ctx,
|
||||
unsigned long dst_start,
|
||||
unsigned long src_start,
|
||||
unsigned long len,
|
||||
enum mcopy_atomic_mode mcopy_mode,
|
||||
atomic_t *mmap_changing,
|
||||
__u64 mode)
|
||||
{
|
||||
struct mm_struct *dst_mm = ctx->mm;
|
||||
struct vm_area_struct *dst_vma;
|
||||
ssize_t err;
|
||||
pmd_t *dst_pmd;
|
||||
@@ -577,8 +581,9 @@ retry:
|
||||
* operation (e.g. mremap) running in parallel, bail out and
|
||||
* request the user to retry later
|
||||
*/
|
||||
down_read(&ctx->map_changing_lock);
|
||||
err = -EAGAIN;
|
||||
if (mmap_changing && atomic_read(mmap_changing))
|
||||
if (atomic_read(&ctx->mmap_changing))
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
@@ -611,9 +616,9 @@ retry:
|
||||
* If this is a HUGETLB vma, pass off to appropriate routine
|
||||
*/
|
||||
if (is_vm_hugetlb_page(dst_vma))
|
||||
return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
|
||||
return __mcopy_atomic_hugetlb(ctx, dst_vma, dst_start,
|
||||
src_start, len, mcopy_mode,
|
||||
wp_copy, mmap_changing);
|
||||
wp_copy);
|
||||
|
||||
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
|
||||
goto out_unlock;
|
||||
@@ -671,6 +676,7 @@ retry:
|
||||
if (unlikely(err == -ENOENT)) {
|
||||
void *page_kaddr;
|
||||
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
BUG_ON(!page);
|
||||
|
||||
@@ -701,6 +707,7 @@ retry:
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
out:
|
||||
if (page)
|
||||
@@ -711,26 +718,23 @@ out:
|
||||
return copied ? copied : err;
|
||||
}
|
||||
|
||||
ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len,
|
||||
atomic_t *mmap_changing, __u64 mode)
|
||||
ssize_t mcopy_atomic(struct userfaultfd_ctx *ctx, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len, __u64 mode)
|
||||
{
|
||||
return __mcopy_atomic(dst_mm, dst_start, src_start, len,
|
||||
MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
|
||||
return __mcopy_atomic(ctx, dst_start, src_start, len,
|
||||
MCOPY_ATOMIC_NORMAL, mode);
|
||||
}
|
||||
|
||||
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
|
||||
unsigned long len, atomic_t *mmap_changing)
|
||||
ssize_t mfill_zeropage(struct userfaultfd_ctx *ctx, unsigned long start,
|
||||
unsigned long len)
|
||||
{
|
||||
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
|
||||
mmap_changing, 0);
|
||||
return __mcopy_atomic(ctx, start, 0, len, MCOPY_ATOMIC_ZEROPAGE, 0);
|
||||
}
|
||||
|
||||
ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
|
||||
unsigned long len, atomic_t *mmap_changing)
|
||||
ssize_t mcopy_continue(struct userfaultfd_ctx *ctx, unsigned long start,
|
||||
unsigned long len)
|
||||
{
|
||||
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
|
||||
mmap_changing, 0);
|
||||
return __mcopy_atomic(ctx, start, 0, len, MCOPY_ATOMIC_CONTINUE, 0);
|
||||
}
|
||||
|
||||
void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
|
||||
@@ -750,10 +754,10 @@ void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
|
||||
tlb_finish_mmu(&tlb);
|
||||
}
|
||||
|
||||
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
|
||||
unsigned long len, bool enable_wp,
|
||||
atomic_t *mmap_changing)
|
||||
int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
|
||||
unsigned long len, bool enable_wp)
|
||||
{
|
||||
struct mm_struct *dst_mm = ctx->mm;
|
||||
struct vm_area_struct *dst_vma;
|
||||
unsigned long page_mask;
|
||||
int err;
|
||||
@@ -774,8 +778,9 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
|
||||
* operation (e.g. mremap) running in parallel, bail out and
|
||||
* request the user to retry later
|
||||
*/
|
||||
down_read(&ctx->map_changing_lock);
|
||||
err = -EAGAIN;
|
||||
if (mmap_changing && atomic_read(mmap_changing))
|
||||
if (atomic_read(&ctx->mmap_changing))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ENOENT;
|
||||
@@ -799,6 +804,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
|
||||
|
||||
err = 0;
|
||||
out_unlock:
|
||||
up_read(&ctx->map_changing_lock);
|
||||
mmap_read_unlock(dst_mm);
|
||||
return err;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user