mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-07 03:15:31 +09:00
Merge ba535c1caf ("mm/oom_kill: allow process_mrelease to run under mmap_lock protection") into android-mainline
Steps on the way to 5.17-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I29d376595033f4ef7895319d9398c2cb415de883
This commit is contained in:
@@ -426,12 +426,14 @@ with the memory region, as the case would be with BSS (uninitialized data).
|
||||
The "pathname" shows the name associated file for this mapping. If the mapping
|
||||
is not associated with a file:
|
||||
|
||||
======= ====================================
|
||||
============= ====================================
|
||||
[heap] the heap of the program
|
||||
[stack] the stack of the main process
|
||||
[vdso] the "virtual dynamic shared object",
|
||||
the kernel system call handler
|
||||
======= ====================================
|
||||
[anon:<name>] an anonymous mapping that has been
|
||||
named by userspace
|
||||
============= ====================================
|
||||
|
||||
or if empty, the mapping is anonymous.
|
||||
|
||||
|
||||
@@ -753,7 +753,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
|
||||
return true;
|
||||
|
||||
if ((pte_flags(a) & _PAGE_PROTNONE) &&
|
||||
mm_tlb_flush_pending(mm))
|
||||
atomic_read(&mm->tlb_flush_pending))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/vmacache.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/huge_mm.h>
|
||||
#include <linux/mount.h>
|
||||
@@ -308,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
|
||||
name = arch_vma_name(vma);
|
||||
if (!name) {
|
||||
const char *anon_name;
|
||||
|
||||
if (!mm) {
|
||||
name = "[vdso]";
|
||||
goto done;
|
||||
@@ -319,8 +322,16 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (is_stack(vma))
|
||||
if (is_stack(vma)) {
|
||||
name = "[stack]";
|
||||
goto done;
|
||||
}
|
||||
|
||||
anon_name = vma_anon_name(vma);
|
||||
if (anon_name) {
|
||||
seq_pad(m, ' ');
|
||||
seq_printf(m, "[anon:%s]", anon_name);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/poll.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -877,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||
new_flags, vma->anon_vma,
|
||||
vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX);
|
||||
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||
if (prev)
|
||||
vma = prev;
|
||||
else
|
||||
@@ -1436,7 +1437,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
||||
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
((struct vm_userfaultfd_ctx){ ctx }));
|
||||
((struct vm_userfaultfd_ctx){ ctx }),
|
||||
vma_anon_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
goto next;
|
||||
@@ -1613,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
||||
prev = vma_merge(mm, prev, start, vma_end, new_flags,
|
||||
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX);
|
||||
NULL_VM_UFFD_CTX, vma_anon_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
goto next;
|
||||
|
||||
@@ -424,51 +424,6 @@ extern unsigned int kobjsize(const void *objp);
|
||||
*/
|
||||
extern pgprot_t protection_map[16];
|
||||
|
||||
/**
|
||||
* enum fault_flag - Fault flag definitions.
|
||||
* @FAULT_FLAG_WRITE: Fault was a write fault.
|
||||
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
|
||||
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
|
||||
* @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
|
||||
* @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
|
||||
* @FAULT_FLAG_TRIED: The fault has been tried once.
|
||||
* @FAULT_FLAG_USER: The fault originated in userspace.
|
||||
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
|
||||
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
|
||||
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
|
||||
*
|
||||
* About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
|
||||
* whether we would allow page faults to retry by specifying these two
|
||||
* fault flags correctly. Currently there can be three legal combinations:
|
||||
*
|
||||
* (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
|
||||
* this is the first try
|
||||
*
|
||||
* (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
|
||||
* we've already tried at least once
|
||||
*
|
||||
* (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
|
||||
*
|
||||
* The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
|
||||
* be used. Note that page faults can be allowed to retry for multiple times,
|
||||
* in which case we'll have an initial fault with flags (a) then later on
|
||||
* continuous faults with flags (b). We should always try to detect pending
|
||||
* signals before a retry to make sure the continuous page faults can still be
|
||||
* interrupted if necessary.
|
||||
*/
|
||||
enum fault_flag {
|
||||
FAULT_FLAG_WRITE = 1 << 0,
|
||||
FAULT_FLAG_MKWRITE = 1 << 1,
|
||||
FAULT_FLAG_ALLOW_RETRY = 1 << 2,
|
||||
FAULT_FLAG_RETRY_NOWAIT = 1 << 3,
|
||||
FAULT_FLAG_KILLABLE = 1 << 4,
|
||||
FAULT_FLAG_TRIED = 1 << 5,
|
||||
FAULT_FLAG_USER = 1 << 6,
|
||||
FAULT_FLAG_REMOTE = 1 << 7,
|
||||
FAULT_FLAG_INSTRUCTION = 1 << 8,
|
||||
FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
|
||||
};
|
||||
|
||||
/*
|
||||
* The default fault flags that should be used by most of the
|
||||
* arch-specific page fault handlers.
|
||||
@@ -577,6 +532,10 @@ enum page_entry_size {
|
||||
*/
|
||||
struct vm_operations_struct {
|
||||
void (*open)(struct vm_area_struct * area);
|
||||
/**
|
||||
* @close: Called when the VMA is being removed from the MM.
|
||||
* Context: User context. May sleep. Caller holds mmap_lock.
|
||||
*/
|
||||
void (*close)(struct vm_area_struct * area);
|
||||
/* Called any time before splitting to check if it's allowed */
|
||||
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
|
||||
@@ -2644,7 +2603,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
||||
extern struct vm_area_struct *vma_merge(struct mm_struct *,
|
||||
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
|
||||
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
|
||||
struct mempolicy *, struct vm_userfaultfd_ctx);
|
||||
struct mempolicy *, struct vm_userfaultfd_ctx, const char *);
|
||||
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
|
||||
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
|
||||
unsigned long addr, int new_below);
|
||||
@@ -3390,5 +3349,16 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name);
|
||||
#else
|
||||
static inline int
|
||||
madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
#ifndef LINUX_MM_INLINE_H
|
||||
#define LINUX_MM_INLINE_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/huge_mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
/**
|
||||
* folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
|
||||
@@ -135,4 +137,138 @@ static __always_inline void del_page_from_lru_list(struct page *page,
|
||||
{
|
||||
lruvec_del_folio(lruvec, page_folio(page));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
/*
|
||||
* mmap_lock should be read-locked when calling vma_anon_name() and while using
|
||||
* the returned pointer.
|
||||
*/
|
||||
extern const char *vma_anon_name(struct vm_area_struct *vma);
|
||||
|
||||
/*
|
||||
* mmap_lock should be read-locked for orig_vma->vm_mm.
|
||||
* mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be
|
||||
* isolated.
|
||||
*/
|
||||
extern void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma);
|
||||
|
||||
/*
|
||||
* mmap_lock should be write-locked or vma should have been isolated under
|
||||
* write-locked mmap_lock protection.
|
||||
*/
|
||||
extern void free_vma_anon_name(struct vm_area_struct *vma);
|
||||
|
||||
/* mmap_lock should be read-locked */
|
||||
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
|
||||
const char *name)
|
||||
{
|
||||
const char *vma_name = vma_anon_name(vma);
|
||||
|
||||
/* either both NULL, or pointers to same string */
|
||||
if (vma_name == name)
|
||||
return true;
|
||||
|
||||
return name && vma_name && !strcmp(name, vma_name);
|
||||
}
|
||||
#else /* CONFIG_ANON_VMA_NAME */
|
||||
static inline const char *vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma) {}
|
||||
static inline void free_vma_anon_name(struct vm_area_struct *vma) {}
|
||||
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
|
||||
const char *name)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
|
||||
static inline void init_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
atomic_set(&mm->tlb_flush_pending, 0);
|
||||
}
|
||||
|
||||
static inline void inc_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
atomic_inc(&mm->tlb_flush_pending);
|
||||
/*
|
||||
* The only time this value is relevant is when there are indeed pages
|
||||
* to flush. And we'll only flush pages after changing them, which
|
||||
* requires the PTL.
|
||||
*
|
||||
* So the ordering here is:
|
||||
*
|
||||
* atomic_inc(&mm->tlb_flush_pending);
|
||||
* spin_lock(&ptl);
|
||||
* ...
|
||||
* set_pte_at();
|
||||
* spin_unlock(&ptl);
|
||||
*
|
||||
* spin_lock(&ptl)
|
||||
* mm_tlb_flush_pending();
|
||||
* ....
|
||||
* spin_unlock(&ptl);
|
||||
*
|
||||
* flush_tlb_range();
|
||||
* atomic_dec(&mm->tlb_flush_pending);
|
||||
*
|
||||
* Where the increment if constrained by the PTL unlock, it thus
|
||||
* ensures that the increment is visible if the PTE modification is
|
||||
* visible. After all, if there is no PTE modification, nobody cares
|
||||
* about TLB flushes either.
|
||||
*
|
||||
* This very much relies on users (mm_tlb_flush_pending() and
|
||||
* mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
|
||||
* therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
|
||||
* locks (PPC) the unlock of one doesn't order against the lock of
|
||||
* another PTL.
|
||||
*
|
||||
* The decrement is ordered by the flush_tlb_range(), such that
|
||||
* mm_tlb_flush_pending() will not return false unless all flushes have
|
||||
* completed.
|
||||
*/
|
||||
}
|
||||
|
||||
static inline void dec_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* See inc_tlb_flush_pending().
|
||||
*
|
||||
* This cannot be smp_mb__before_atomic() because smp_mb() simply does
|
||||
* not order against TLB invalidate completion, which is what we need.
|
||||
*
|
||||
* Therefore we must rely on tlb_flush_*() to guarantee order.
|
||||
*/
|
||||
atomic_dec(&mm->tlb_flush_pending);
|
||||
}
|
||||
|
||||
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Must be called after having acquired the PTL; orders against that
|
||||
* PTLs release and therefore ensures that if we observe the modified
|
||||
* PTE we must also observe the increment from inc_tlb_flush_pending().
|
||||
*
|
||||
* That is, it only guarantees to return true if there is a flush
|
||||
* pending for _this_ PTL.
|
||||
*/
|
||||
return atomic_read(&mm->tlb_flush_pending);
|
||||
}
|
||||
|
||||
static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Similar to mm_tlb_flush_pending(), we must have acquired the PTL
|
||||
* for which there is a TLB flush pending in order to guarantee
|
||||
* we've seen both that PTE modification and the increment.
|
||||
*
|
||||
* (no requirement on actually still holding the PTL, that is irrelevant)
|
||||
*/
|
||||
return atomic_read(&mm->tlb_flush_pending) > 1;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <linux/mm_types_task.h>
|
||||
|
||||
#include <linux/auxvec.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rbtree.h>
|
||||
@@ -386,6 +387,12 @@ struct vm_userfaultfd_ctx {
|
||||
struct vm_userfaultfd_ctx {};
|
||||
#endif /* CONFIG_USERFAULTFD */
|
||||
|
||||
struct anon_vma_name {
|
||||
struct kref kref;
|
||||
/* The name needs to be at the end because it is dynamically sized. */
|
||||
char name[];
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct describes a virtual memory area. There is one of these
|
||||
* per VM-area/task. A VM area is any part of the process virtual memory
|
||||
@@ -426,11 +433,19 @@ struct vm_area_struct {
|
||||
/*
|
||||
* For areas with an address space and backing store,
|
||||
* linkage into the address_space->i_mmap interval tree.
|
||||
*
|
||||
* For private anonymous mappings, a pointer to a null terminated string
|
||||
* containing the name given to the vma, or NULL if unnamed.
|
||||
*/
|
||||
struct {
|
||||
struct rb_node rb;
|
||||
unsigned long rb_subtree_last;
|
||||
} shared;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct rb_node rb;
|
||||
unsigned long rb_subtree_last;
|
||||
} shared;
|
||||
/* Serialized by mmap_sem. */
|
||||
struct anon_vma_name *anon_name;
|
||||
};
|
||||
|
||||
/*
|
||||
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
|
||||
@@ -677,90 +692,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
|
||||
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
|
||||
extern void tlb_finish_mmu(struct mmu_gather *tlb);
|
||||
|
||||
static inline void init_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
atomic_set(&mm->tlb_flush_pending, 0);
|
||||
}
|
||||
|
||||
static inline void inc_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
atomic_inc(&mm->tlb_flush_pending);
|
||||
/*
|
||||
* The only time this value is relevant is when there are indeed pages
|
||||
* to flush. And we'll only flush pages after changing them, which
|
||||
* requires the PTL.
|
||||
*
|
||||
* So the ordering here is:
|
||||
*
|
||||
* atomic_inc(&mm->tlb_flush_pending);
|
||||
* spin_lock(&ptl);
|
||||
* ...
|
||||
* set_pte_at();
|
||||
* spin_unlock(&ptl);
|
||||
*
|
||||
* spin_lock(&ptl)
|
||||
* mm_tlb_flush_pending();
|
||||
* ....
|
||||
* spin_unlock(&ptl);
|
||||
*
|
||||
* flush_tlb_range();
|
||||
* atomic_dec(&mm->tlb_flush_pending);
|
||||
*
|
||||
* Where the increment if constrained by the PTL unlock, it thus
|
||||
* ensures that the increment is visible if the PTE modification is
|
||||
* visible. After all, if there is no PTE modification, nobody cares
|
||||
* about TLB flushes either.
|
||||
*
|
||||
* This very much relies on users (mm_tlb_flush_pending() and
|
||||
* mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
|
||||
* therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
|
||||
* locks (PPC) the unlock of one doesn't order against the lock of
|
||||
* another PTL.
|
||||
*
|
||||
* The decrement is ordered by the flush_tlb_range(), such that
|
||||
* mm_tlb_flush_pending() will not return false unless all flushes have
|
||||
* completed.
|
||||
*/
|
||||
}
|
||||
|
||||
static inline void dec_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* See inc_tlb_flush_pending().
|
||||
*
|
||||
* This cannot be smp_mb__before_atomic() because smp_mb() simply does
|
||||
* not order against TLB invalidate completion, which is what we need.
|
||||
*
|
||||
* Therefore we must rely on tlb_flush_*() to guarantee order.
|
||||
*/
|
||||
atomic_dec(&mm->tlb_flush_pending);
|
||||
}
|
||||
|
||||
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Must be called after having acquired the PTL; orders against that
|
||||
* PTLs release and therefore ensures that if we observe the modified
|
||||
* PTE we must also observe the increment from inc_tlb_flush_pending().
|
||||
*
|
||||
* That is, it only guarantees to return true if there is a flush
|
||||
* pending for _this_ PTL.
|
||||
*/
|
||||
return atomic_read(&mm->tlb_flush_pending);
|
||||
}
|
||||
|
||||
static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Similar to mm_tlb_flush_pending(), we must have acquired the PTL
|
||||
* for which there is a TLB flush pending in order to guarantee
|
||||
* we've seen both that PTE modification and the increment.
|
||||
*
|
||||
* (no requirement on actually still holding the PTL, that is irrelevant)
|
||||
*/
|
||||
return atomic_read(&mm->tlb_flush_pending) > 1;
|
||||
}
|
||||
|
||||
struct vm_fault;
|
||||
|
||||
/**
|
||||
@@ -875,4 +806,49 @@ typedef struct {
|
||||
unsigned long val;
|
||||
} swp_entry_t;
|
||||
|
||||
/**
|
||||
* enum fault_flag - Fault flag definitions.
|
||||
* @FAULT_FLAG_WRITE: Fault was a write fault.
|
||||
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
|
||||
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
|
||||
* @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
|
||||
* @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
|
||||
* @FAULT_FLAG_TRIED: The fault has been tried once.
|
||||
* @FAULT_FLAG_USER: The fault originated in userspace.
|
||||
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
|
||||
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
|
||||
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
|
||||
*
|
||||
* About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
|
||||
* whether we would allow page faults to retry by specifying these two
|
||||
* fault flags correctly. Currently there can be three legal combinations:
|
||||
*
|
||||
* (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
|
||||
* this is the first try
|
||||
*
|
||||
* (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
|
||||
* we've already tried at least once
|
||||
*
|
||||
* (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
|
||||
*
|
||||
* The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
|
||||
* be used. Note that page faults can be allowed to retry for multiple times,
|
||||
* in which case we'll have an initial fault with flags (a) then later on
|
||||
* continuous faults with flags (b). We should always try to detect pending
|
||||
* signals before a retry to make sure the continuous page faults can still be
|
||||
* interrupted if necessary.
|
||||
*/
|
||||
enum fault_flag {
|
||||
FAULT_FLAG_WRITE = 1 << 0,
|
||||
FAULT_FLAG_MKWRITE = 1 << 1,
|
||||
FAULT_FLAG_ALLOW_RETRY = 1 << 2,
|
||||
FAULT_FLAG_RETRY_NOWAIT = 1 << 3,
|
||||
FAULT_FLAG_KILLABLE = 1 << 4,
|
||||
FAULT_FLAG_TRIED = 1 << 5,
|
||||
FAULT_FLAG_USER = 1 << 6,
|
||||
FAULT_FLAG_REMOTE = 1 << 7,
|
||||
FAULT_FLAG_INSTRUCTION = 1 << 8,
|
||||
FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
|
||||
};
|
||||
|
||||
#endif /* _LINUX_MM_TYPES_H */
|
||||
|
||||
@@ -272,4 +272,7 @@ struct prctl_mm_map {
|
||||
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
|
||||
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
|
||||
|
||||
#define PR_SET_VMA 0x53564d41
|
||||
# define PR_SET_VMA_ANON_NAME 0
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/vmacache.h>
|
||||
#include <linux/nsproxy.h>
|
||||
#include <linux/capability.h>
|
||||
@@ -371,12 +372,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
||||
*new = data_race(*orig);
|
||||
INIT_LIST_HEAD(&new->anon_vma_chain);
|
||||
new->vm_next = new->vm_prev = NULL;
|
||||
dup_vma_anon_name(orig, new);
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
void vm_area_free(struct vm_area_struct *vma)
|
||||
{
|
||||
free_vma_anon_name(vma);
|
||||
kmem_cache_free(vm_area_cachep, vma);
|
||||
}
|
||||
|
||||
|
||||
63
kernel/sys.c
63
kernel/sys.c
@@ -2263,6 +2263,66 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
|
||||
|
||||
#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
|
||||
#define ANON_VMA_NAME_MAX_LEN 80
|
||||
#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]"
|
||||
|
||||
static inline bool is_valid_name_char(char ch)
|
||||
{
|
||||
/* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */
|
||||
return ch > 0x1f && ch < 0x7f &&
|
||||
!strchr(ANON_VMA_NAME_INVALID_CHARS, ch);
|
||||
}
|
||||
|
||||
static int prctl_set_vma(unsigned long opt, unsigned long addr,
|
||||
unsigned long size, unsigned long arg)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
const char __user *uname;
|
||||
char *name, *pch;
|
||||
int error;
|
||||
|
||||
switch (opt) {
|
||||
case PR_SET_VMA_ANON_NAME:
|
||||
uname = (const char __user *)arg;
|
||||
if (uname) {
|
||||
name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
|
||||
|
||||
if (IS_ERR(name))
|
||||
return PTR_ERR(name);
|
||||
|
||||
for (pch = name; *pch != '\0'; pch++) {
|
||||
if (!is_valid_name_char(*pch)) {
|
||||
kfree(name);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Reset the name */
|
||||
name = NULL;
|
||||
}
|
||||
|
||||
mmap_write_lock(mm);
|
||||
error = madvise_set_anon_name(mm, addr, size, name);
|
||||
mmap_write_unlock(mm);
|
||||
kfree(name);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#else /* CONFIG_ANON_VMA_NAME */
|
||||
static int prctl_set_vma(unsigned long opt, unsigned long start,
|
||||
unsigned long size, unsigned long arg)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
|
||||
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
unsigned long, arg4, unsigned long, arg5)
|
||||
{
|
||||
@@ -2532,6 +2592,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
error = sched_core_share_pid(arg2, arg3, arg4, arg5);
|
||||
break;
|
||||
#endif
|
||||
case PR_SET_VMA:
|
||||
error = prctl_set_vma(arg2, arg3, arg4, arg5);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
|
||||
14
mm/Kconfig
14
mm/Kconfig
@@ -900,6 +900,20 @@ config IO_MAPPING
|
||||
config SECRETMEM
|
||||
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
|
||||
|
||||
config ANON_VMA_NAME
|
||||
bool "Anonymous VMA name support"
|
||||
depends on PROC_FS && ADVISE_SYSCALLS && MMU
|
||||
|
||||
help
|
||||
Allow naming anonymous virtual memory areas.
|
||||
|
||||
This feature allows assigning names to virtual memory areas. Assigned
|
||||
names can be later retrieved from /proc/pid/maps and /proc/pid/smaps
|
||||
and help identifying individual anonymous memory areas.
|
||||
Assigning a name to anonymous virtual memory area might prevent that
|
||||
area from being merged with adjacent virtual memory areas due to the
|
||||
difference in their name.
|
||||
|
||||
source "mm/damon/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
||||
1
mm/ksm.c
1
mm/ksm.c
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
160
mm/madvise.c
160
mm/madvise.c
@@ -18,6 +18,8 @@
|
||||
#include <linux/fadvise.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/ksm.h>
|
||||
#include <linux/fs.h>
|
||||
@@ -62,19 +64,114 @@ static int madvise_need_mmap_write(int behavior)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
static struct anon_vma_name *anon_vma_name_alloc(const char *name)
|
||||
{
|
||||
struct anon_vma_name *anon_name;
|
||||
size_t count;
|
||||
|
||||
/* Add 1 for NUL terminator at the end of the anon_name->name */
|
||||
count = strlen(name) + 1;
|
||||
anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL);
|
||||
if (anon_name) {
|
||||
kref_init(&anon_name->kref);
|
||||
memcpy(anon_name->name, name, count);
|
||||
}
|
||||
|
||||
return anon_name;
|
||||
}
|
||||
|
||||
static void vma_anon_name_free(struct kref *kref)
|
||||
{
|
||||
struct anon_vma_name *anon_name =
|
||||
container_of(kref, struct anon_vma_name, kref);
|
||||
kfree(anon_name);
|
||||
}
|
||||
|
||||
static inline bool has_vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return !vma->vm_file && vma->anon_name;
|
||||
}
|
||||
|
||||
const char *vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!has_vma_anon_name(vma))
|
||||
return NULL;
|
||||
|
||||
mmap_assert_locked(vma->vm_mm);
|
||||
|
||||
return vma->anon_name->name;
|
||||
}
|
||||
|
||||
void dup_vma_anon_name(struct vm_area_struct *orig_vma,
|
||||
struct vm_area_struct *new_vma)
|
||||
{
|
||||
if (!has_vma_anon_name(orig_vma))
|
||||
return;
|
||||
|
||||
kref_get(&orig_vma->anon_name->kref);
|
||||
new_vma->anon_name = orig_vma->anon_name;
|
||||
}
|
||||
|
||||
void free_vma_anon_name(struct vm_area_struct *vma)
|
||||
{
|
||||
struct anon_vma_name *anon_name;
|
||||
|
||||
if (!has_vma_anon_name(vma))
|
||||
return;
|
||||
|
||||
anon_name = vma->anon_name;
|
||||
vma->anon_name = NULL;
|
||||
kref_put(&anon_name->kref, vma_anon_name_free);
|
||||
}
|
||||
|
||||
/* mmap_lock should be write-locked */
|
||||
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
|
||||
{
|
||||
const char *anon_name;
|
||||
|
||||
if (!name) {
|
||||
free_vma_anon_name(vma);
|
||||
return 0;
|
||||
}
|
||||
|
||||
anon_name = vma_anon_name(vma);
|
||||
if (anon_name) {
|
||||
/* Same name, nothing to do here */
|
||||
if (!strcmp(name, anon_name))
|
||||
return 0;
|
||||
|
||||
free_vma_anon_name(vma);
|
||||
}
|
||||
vma->anon_name = anon_vma_name_alloc(name);
|
||||
if (!vma->anon_name)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* CONFIG_ANON_VMA_NAME */
|
||||
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
|
||||
{
|
||||
if (name)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
/*
|
||||
* Update the vm_flags on region of a vma, splitting it or merging it as
|
||||
* necessary. Must be called with mmap_sem held for writing;
|
||||
*/
|
||||
static int madvise_update_vma(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev, unsigned long start,
|
||||
unsigned long end, unsigned long new_flags)
|
||||
unsigned long end, unsigned long new_flags,
|
||||
const char *name)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
int error;
|
||||
pgoff_t pgoff;
|
||||
|
||||
if (new_flags == vma->vm_flags) {
|
||||
if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) {
|
||||
*prev = vma;
|
||||
return 0;
|
||||
}
|
||||
@@ -82,7 +179,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
|
||||
vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx);
|
||||
vma->vm_userfaultfd_ctx, name);
|
||||
if (*prev) {
|
||||
vma = *prev;
|
||||
goto success;
|
||||
@@ -111,6 +208,11 @@ success:
|
||||
* vm_flags is protected by the mmap_lock held in write mode.
|
||||
*/
|
||||
vma->vm_flags = new_flags;
|
||||
if (!vma->vm_file) {
|
||||
error = replace_vma_anon_name(vma, name);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -938,7 +1040,8 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
|
||||
break;
|
||||
}
|
||||
|
||||
error = madvise_update_vma(vma, prev, start, end, new_flags);
|
||||
error = madvise_update_vma(vma, prev, start, end, new_flags,
|
||||
vma_anon_name(vma));
|
||||
|
||||
out:
|
||||
/*
|
||||
@@ -1118,6 +1221,55 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
|
||||
return unmapped_error;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ANON_VMA_NAME
|
||||
static int madvise_vma_anon_name(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long name)
|
||||
{
|
||||
int error;
|
||||
|
||||
/* Only anonymous mappings can be named */
|
||||
if (vma->vm_file)
|
||||
return -EBADF;
|
||||
|
||||
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
|
||||
(const char *)name);
|
||||
|
||||
/*
|
||||
* madvise() returns EAGAIN if kernel resources, such as
|
||||
* slab, are temporarily unavailable.
|
||||
*/
|
||||
if (error == -ENOMEM)
|
||||
error = -EAGAIN;
|
||||
return error;
|
||||
}
|
||||
|
||||
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long len_in, const char *name)
|
||||
{
|
||||
unsigned long end;
|
||||
unsigned long len;
|
||||
|
||||
if (start & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
len = (len_in + ~PAGE_MASK) & PAGE_MASK;
|
||||
|
||||
/* Check to see whether len was rounded up from small -ve to zero */
|
||||
if (len_in && !len)
|
||||
return -EINVAL;
|
||||
|
||||
end = start + len;
|
||||
if (end < start)
|
||||
return -EINVAL;
|
||||
|
||||
if (end == start)
|
||||
return 0;
|
||||
|
||||
return madvise_walk_vmas(mm, start, end, (unsigned long)name,
|
||||
madvise_vma_anon_name);
|
||||
}
|
||||
#endif /* CONFIG_ANON_VMA_NAME */
|
||||
/*
|
||||
* The madvise(2) system call.
|
||||
*
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/coredump.h>
|
||||
#include <linux/sched/numa_balancing.h>
|
||||
|
||||
@@ -810,7 +810,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
|
||||
((vmstart - vma->vm_start) >> PAGE_SHIFT);
|
||||
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
|
||||
vma->anon_vma, vma->vm_file, pgoff,
|
||||
new_pol, vma->vm_userfaultfd_ctx);
|
||||
new_pol, vma->vm_userfaultfd_ctx,
|
||||
vma_anon_name(vma));
|
||||
if (prev) {
|
||||
vma = prev;
|
||||
next = vma->vm_next;
|
||||
|
||||
@@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
|
||||
vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx);
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
if (*prev) {
|
||||
vma = *prev;
|
||||
goto success;
|
||||
|
||||
55
mm/mmap.c
55
mm/mmap.c
@@ -13,6 +13,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/vmacache.h>
|
||||
#include <linux/shm.h>
|
||||
#include <linux/mman.h>
|
||||
@@ -1029,7 +1030,8 @@ again:
|
||||
*/
|
||||
static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
||||
struct file *file, unsigned long vm_flags,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
{
|
||||
/*
|
||||
* VM_SOFTDIRTY should not prevent from VMA merging, if we
|
||||
@@ -1047,6 +1049,8 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
|
||||
return 0;
|
||||
if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
|
||||
return 0;
|
||||
if (!is_same_vma_anon_name(vma, anon_name))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1079,9 +1083,10 @@ static int
|
||||
can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t vm_pgoff,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
{
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
|
||||
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
|
||||
if (vma->vm_pgoff == vm_pgoff)
|
||||
return 1;
|
||||
@@ -1100,9 +1105,10 @@ static int
|
||||
can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t vm_pgoff,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
{
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
|
||||
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
|
||||
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
|
||||
pgoff_t vm_pglen;
|
||||
vm_pglen = vma_pages(vma);
|
||||
@@ -1113,9 +1119,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
|
||||
* whether that can be merged with its predecessor or its successor.
|
||||
* Or both (it neatly fills a hole).
|
||||
* Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name),
|
||||
* figure out whether that can be merged with its predecessor or its
|
||||
* successor. Or both (it neatly fills a hole).
|
||||
*
|
||||
* In most cases - when called for mmap, brk or mremap - [addr,end) is
|
||||
* certain not to be mapped by the time vma_merge is called; but when
|
||||
@@ -1160,7 +1166,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
||||
unsigned long end, unsigned long vm_flags,
|
||||
struct anon_vma *anon_vma, struct file *file,
|
||||
pgoff_t pgoff, struct mempolicy *policy,
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
|
||||
struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
|
||||
const char *anon_name)
|
||||
{
|
||||
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
|
||||
struct vm_area_struct *area, *next;
|
||||
@@ -1190,7 +1197,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
||||
mpol_equal(vma_policy(prev), policy) &&
|
||||
can_vma_merge_after(prev, vm_flags,
|
||||
anon_vma, file, pgoff,
|
||||
vm_userfaultfd_ctx)) {
|
||||
vm_userfaultfd_ctx, anon_name)) {
|
||||
/*
|
||||
* OK, it can. Can we now merge in the successor as well?
|
||||
*/
|
||||
@@ -1199,7 +1206,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
||||
can_vma_merge_before(next, vm_flags,
|
||||
anon_vma, file,
|
||||
pgoff+pglen,
|
||||
vm_userfaultfd_ctx) &&
|
||||
vm_userfaultfd_ctx, anon_name) &&
|
||||
is_mergeable_anon_vma(prev->anon_vma,
|
||||
next->anon_vma, NULL)) {
|
||||
/* cases 1, 6 */
|
||||
@@ -1222,7 +1229,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
|
||||
mpol_equal(policy, vma_policy(next)) &&
|
||||
can_vma_merge_before(next, vm_flags,
|
||||
anon_vma, file, pgoff+pglen,
|
||||
vm_userfaultfd_ctx)) {
|
||||
vm_userfaultfd_ctx, anon_name)) {
|
||||
if (prev && addr < prev->vm_end) /* case 4 */
|
||||
err = __vma_adjust(prev, prev->vm_start,
|
||||
addr, prev->vm_pgoff, NULL, next);
|
||||
@@ -1754,7 +1761,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
* Can we just expand an old mapping?
|
||||
*/
|
||||
vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
|
||||
NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
|
||||
NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
|
||||
if (vma)
|
||||
goto out;
|
||||
|
||||
@@ -1803,7 +1810,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
|
||||
*/
|
||||
if (unlikely(vm_flags != vma->vm_flags && prev)) {
|
||||
merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
|
||||
NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
|
||||
NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
|
||||
if (merge) {
|
||||
/* ->mmap() can change vma->vm_file and fput the original file. So
|
||||
* fput the vma->vm_file here or we would add an extra fput for file
|
||||
@@ -3056,7 +3063,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
|
||||
|
||||
/* Can we just expand an old private anonymous mapping? */
|
||||
vma = vma_merge(mm, prev, addr, addr + len, flags,
|
||||
NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
|
||||
NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
|
||||
if (vma)
|
||||
goto out;
|
||||
|
||||
@@ -3142,25 +3149,27 @@ void exit_mmap(struct mm_struct *mm)
|
||||
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
|
||||
* __oom_reap_task_mm() will not block.
|
||||
*
|
||||
* This needs to be done before calling munlock_vma_pages_all(),
|
||||
* This needs to be done before calling unlock_range(),
|
||||
* which clears VM_LOCKED, otherwise the oom reaper cannot
|
||||
* reliably test it.
|
||||
*/
|
||||
(void)__oom_reap_task_mm(mm);
|
||||
|
||||
set_bit(MMF_OOM_SKIP, &mm->flags);
|
||||
mmap_write_lock(mm);
|
||||
mmap_write_unlock(mm);
|
||||
}
|
||||
|
||||
mmap_write_lock(mm);
|
||||
if (mm->locked_vm)
|
||||
unlock_range(mm->mmap, ULONG_MAX);
|
||||
|
||||
arch_exit_mmap(mm);
|
||||
|
||||
vma = mm->mmap;
|
||||
if (!vma) /* Can happen if dup_mmap() received an OOM */
|
||||
if (!vma) {
|
||||
/* Can happen if dup_mmap() received an OOM */
|
||||
mmap_write_unlock(mm);
|
||||
return;
|
||||
}
|
||||
|
||||
lru_add_drain();
|
||||
flush_cache_mm(mm);
|
||||
@@ -3171,16 +3180,14 @@ void exit_mmap(struct mm_struct *mm)
|
||||
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
|
||||
tlb_finish_mmu(&tlb);
|
||||
|
||||
/*
|
||||
* Walk the list again, actually closing and freeing it,
|
||||
* with preemption enabled, without holding any MM locks.
|
||||
*/
|
||||
/* Walk the list again, actually closing and freeing it. */
|
||||
while (vma) {
|
||||
if (vma->vm_flags & VM_ACCOUNT)
|
||||
nr_accounted += vma_pages(vma);
|
||||
vma = remove_vma(vma);
|
||||
cond_resched();
|
||||
}
|
||||
mmap_write_unlock(mm);
|
||||
vm_unacct_memory(nr_accounted);
|
||||
}
|
||||
|
||||
@@ -3249,7 +3256,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
|
||||
return NULL; /* should never get here */
|
||||
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
|
||||
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx);
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
if (new_vma) {
|
||||
/*
|
||||
* Source vma may have been merged into new_vma
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
@@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
|
||||
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
|
||||
*pprev = vma_merge(mm, *pprev, start, end, newflags,
|
||||
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx);
|
||||
vma->vm_userfaultfd_ctx, vma_anon_name(vma));
|
||||
if (*pprev) {
|
||||
vma = *pprev;
|
||||
VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
|
||||
|
||||
@@ -1170,15 +1170,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
goto put_task;
|
||||
}
|
||||
|
||||
if (mmget_not_zero(p->mm)) {
|
||||
mm = p->mm;
|
||||
if (task_will_free_mem(p))
|
||||
reap = true;
|
||||
else {
|
||||
/* Error only if the work has not been done already */
|
||||
if (!test_bit(MMF_OOM_SKIP, &mm->flags))
|
||||
ret = -EINVAL;
|
||||
}
|
||||
mm = p->mm;
|
||||
mmgrab(mm);
|
||||
|
||||
if (task_will_free_mem(p))
|
||||
reap = true;
|
||||
else {
|
||||
/* Error only if the work has not been done already */
|
||||
if (!test_bit(MMF_OOM_SKIP, &mm->flags))
|
||||
ret = -EINVAL;
|
||||
}
|
||||
task_unlock(p);
|
||||
|
||||
@@ -1189,13 +1189,16 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
ret = -EINTR;
|
||||
goto drop_mm;
|
||||
}
|
||||
if (!__oom_reap_task_mm(mm))
|
||||
/*
|
||||
* Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
|
||||
* possible change in exit_mmap is seen
|
||||
*/
|
||||
if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
|
||||
ret = -EAGAIN;
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
drop_mm:
|
||||
if (mm)
|
||||
mmput(mm);
|
||||
mmdrop(mm);
|
||||
put_task:
|
||||
put_task_struct(task);
|
||||
return ret;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <asm/tlb.h>
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user