mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-06 10:58:48 +09:00
Merge 0eb68437a7 ("vmalloc: choose a better start address in vm_area_register_early()") into android-mainline
Steps on the way to 5.16-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ia521da34dd6bf9f4c3e3927228f34230a986d901
This commit is contained in:
@@ -87,10 +87,8 @@ Brief summary of control files.
|
||||
memory.oom_control set/show oom controls.
|
||||
memory.numa_stat show the number of memory usage per numa
|
||||
node
|
||||
memory.kmem.limit_in_bytes set/show hard limit for kernel memory
|
||||
This knob is deprecated and shouldn't be
|
||||
used. It is planned that this be removed in
|
||||
the foreseeable future.
|
||||
memory.kmem.limit_in_bytes This knob is deprecated and writing to
|
||||
it will return -ENOTSUPP.
|
||||
memory.kmem.usage_in_bytes show current kernel memory allocation
|
||||
memory.kmem.failcnt show the number of kernel memory usage
|
||||
hits limits
|
||||
@@ -518,11 +516,6 @@ will be charged as a new owner of it.
|
||||
charged file caches. Some out-of-use page caches may keep charged until
|
||||
memory pressure happens. If you want to avoid that, force_empty will be useful.
|
||||
|
||||
Also, note that when memory.kmem.limit_in_bytes is set the charges due to
|
||||
kernel pages will still be seen. This is not considered a failure and the
|
||||
write will still return success. In this case, it is expected that
|
||||
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
|
||||
|
||||
5.2 stat file
|
||||
-------------
|
||||
|
||||
|
||||
@@ -2409,6 +2409,7 @@ static void __exit cleanup_mtd(void)
|
||||
if (proc_mtd)
|
||||
remove_proc_entry("mtd", NULL);
|
||||
class_unregister(&mtd_class);
|
||||
bdi_unregister(mtd_bdi);
|
||||
bdi_put(mtd_bdi);
|
||||
idr_destroy(&mtd_idr);
|
||||
}
|
||||
|
||||
@@ -476,6 +476,8 @@ void generic_shutdown_super(struct super_block *sb)
|
||||
spin_unlock(&sb_lock);
|
||||
up_write(&sb->s_umount);
|
||||
if (sb->s_bdi != &noop_backing_dev_info) {
|
||||
if (sb->s_iflags & SB_I_PERSB_BDI)
|
||||
bdi_unregister(sb->s_bdi);
|
||||
bdi_put(sb->s_bdi);
|
||||
sb->s_bdi = &noop_backing_dev_info;
|
||||
}
|
||||
@@ -1562,6 +1564,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
|
||||
}
|
||||
WARN_ON(sb->s_bdi != &noop_backing_dev_info);
|
||||
sb->s_bdi = bdi;
|
||||
sb->s_iflags |= SB_I_PERSB_BDI;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -103,6 +103,9 @@ struct wb_completion {
|
||||
* change as blkcg is disabled and enabled higher up in the hierarchy, a wb
|
||||
* is tested for blkcg after lookup and removed from index on mismatch so
|
||||
* that a new wb for the combination can be created.
|
||||
*
|
||||
* Each bdi_writeback that is not embedded into the backing_dev_info must hold
|
||||
* a reference to the parent backing_dev_info. See cgwb_create() for details.
|
||||
*/
|
||||
struct bdi_writeback {
|
||||
struct backing_dev_info *bdi; /* our parent bdi */
|
||||
|
||||
@@ -1440,6 +1440,7 @@ extern int send_sigurg(struct fown_struct *fown);
|
||||
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
|
||||
|
||||
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
|
||||
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
|
||||
|
||||
/* Possible states of 'frozen' field */
|
||||
enum {
|
||||
|
||||
@@ -609,9 +609,9 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
|
||||
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
|
||||
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
|
||||
|
||||
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
|
||||
void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
|
||||
void free_pages_exact(void *virt, size_t size);
|
||||
void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
|
||||
__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1);
|
||||
|
||||
#define __get_free_page(gfp_mask) \
|
||||
__get_free_pages((gfp_mask), 0)
|
||||
|
||||
@@ -132,13 +132,7 @@ io_mapping_init_wc(struct io_mapping *iomap,
|
||||
|
||||
iomap->base = base;
|
||||
iomap->size = size;
|
||||
#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
|
||||
iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
|
||||
#elif defined(pgprot_writecombine)
|
||||
iomap->prot = pgprot_writecombine(PAGE_KERNEL);
|
||||
#else
|
||||
iomap->prot = pgprot_noncached(PAGE_KERNEL);
|
||||
#endif
|
||||
|
||||
return iomap;
|
||||
}
|
||||
|
||||
@@ -180,12 +180,6 @@ struct mem_cgroup_thresholds {
|
||||
struct mem_cgroup_threshold_ary *spare;
|
||||
};
|
||||
|
||||
enum memcg_kmem_state {
|
||||
KMEM_NONE,
|
||||
KMEM_ALLOCATED,
|
||||
KMEM_ONLINE,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_SMP)
|
||||
struct memcg_padding {
|
||||
char x[0];
|
||||
@@ -318,7 +312,6 @@ struct mem_cgroup {
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
int kmemcg_id;
|
||||
enum memcg_kmem_state kmem_state;
|
||||
struct obj_cgroup __rcu *objcg;
|
||||
struct list_head objcg_list; /* list of inherited objcgs */
|
||||
#endif
|
||||
|
||||
@@ -140,7 +140,6 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
|
||||
extern int walk_memory_blocks(unsigned long start, unsigned long size,
|
||||
void *arg, walk_memory_blocks_func_t func);
|
||||
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
|
||||
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
|
||||
|
||||
extern int memory_group_register_static(int nid, unsigned long max_pages);
|
||||
extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
|
||||
|
||||
@@ -1827,12 +1827,24 @@ extern void user_shm_unlock(size_t, struct ucounts *);
|
||||
* Parameter block passed down to zap_pte_range in exceptional cases.
|
||||
*/
|
||||
struct zap_details {
|
||||
struct address_space *check_mapping; /* Check page->mapping if set */
|
||||
pgoff_t first_index; /* Lowest page->index to unmap */
|
||||
pgoff_t last_index; /* Highest page->index to unmap */
|
||||
struct address_space *zap_mapping; /* Check page->mapping if set */
|
||||
struct page *single_page; /* Locked page to be unmapped */
|
||||
};
|
||||
|
||||
/*
|
||||
* We set details->zap_mappings when we want to unmap shared but keep private
|
||||
* pages. Return true if skip zapping this page, false otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
zap_skip_check_mapping(struct zap_details *details, struct page *page)
|
||||
{
|
||||
if (!details || !page)
|
||||
return false;
|
||||
|
||||
return details->zap_mapping &&
|
||||
(details->zap_mapping != page_rmapping(page));
|
||||
}
|
||||
|
||||
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
pte_t pte);
|
||||
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
|
||||
@@ -1479,7 +1479,7 @@ static inline int pfn_valid(unsigned long pfn)
|
||||
|
||||
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
|
||||
return 0;
|
||||
ms = __nr_to_section(pfn_to_section_nr(pfn));
|
||||
ms = __pfn_to_section(pfn);
|
||||
if (!valid_section(ms))
|
||||
return 0;
|
||||
/*
|
||||
@@ -1494,7 +1494,7 @@ static inline int pfn_in_present_section(unsigned long pfn)
|
||||
{
|
||||
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
|
||||
return 0;
|
||||
return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
|
||||
return present_section(__pfn_to_section(pfn));
|
||||
}
|
||||
|
||||
static inline unsigned long next_present_section_nr(unsigned long section_nr)
|
||||
|
||||
@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
|
||||
pcpu_fc_populate_pte_fn_t populate_pte_fn);
|
||||
#endif
|
||||
|
||||
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
|
||||
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
|
||||
extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
|
||||
extern bool is_kernel_percpu_address(unsigned long addr);
|
||||
|
||||
@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr);
|
||||
extern void __init setup_per_cpu_areas(void);
|
||||
#endif
|
||||
|
||||
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
|
||||
extern void __percpu *__alloc_percpu(size_t size, size_t align);
|
||||
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
|
||||
extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
|
||||
extern void free_percpu(void __percpu *__pdata);
|
||||
extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
|
||||
|
||||
|
||||
@@ -181,7 +181,7 @@ int kmem_cache_shrink(struct kmem_cache *s);
|
||||
/*
|
||||
* Common kmalloc functions provided by all allocators
|
||||
*/
|
||||
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags);
|
||||
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
|
||||
void kfree(const void *objp);
|
||||
void kfree_sensitive(const void *objp);
|
||||
size_t __ksize(const void *objp);
|
||||
@@ -425,7 +425,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
|
||||
#define kmalloc_index(s) __kmalloc_index(s, true)
|
||||
#endif /* !CONFIG_SLOB */
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
|
||||
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
|
||||
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
|
||||
void kmem_cache_free(struct kmem_cache *s, void *objp);
|
||||
|
||||
@@ -449,11 +449,12 @@ static __always_inline void kfree_bulk(size_t size, void **p)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
|
||||
__alloc_size(1);
|
||||
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
|
||||
__malloc;
|
||||
#else
|
||||
static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __kmalloc(size, flags);
|
||||
}
|
||||
@@ -466,23 +467,23 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
|
||||
__assume_slab_alignment __malloc;
|
||||
__assume_slab_alignment __alloc_size(3);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size) __assume_slab_alignment __malloc;
|
||||
int node, size_t size) __assume_slab_alignment
|
||||
__alloc_size(4);
|
||||
#else
|
||||
static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
||||
gfp_t gfpflags, int node,
|
||||
size_t size)
|
||||
static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
||||
gfp_t gfpflags, int node, size_t size)
|
||||
{
|
||||
return kmem_cache_alloc_trace(s, gfpflags, size);
|
||||
}
|
||||
#endif /* CONFIG_NUMA */
|
||||
|
||||
#else /* CONFIG_TRACING */
|
||||
static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags,
|
||||
size_t size)
|
||||
static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
|
||||
gfp_t flags, size_t size)
|
||||
{
|
||||
void *ret = kmem_cache_alloc(s, flags);
|
||||
|
||||
@@ -501,19 +502,20 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g
|
||||
#endif /* CONFIG_TRACING */
|
||||
|
||||
extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
|
||||
__malloc;
|
||||
__alloc_size(1);
|
||||
|
||||
#ifdef CONFIG_TRACING
|
||||
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
||||
__assume_page_alignment __malloc;
|
||||
__assume_page_alignment __alloc_size(1);
|
||||
#else
|
||||
static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
||||
static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags,
|
||||
unsigned int order)
|
||||
{
|
||||
return kmalloc_order(size, flags, order);
|
||||
}
|
||||
#endif
|
||||
|
||||
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
|
||||
static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
unsigned int order = get_order(size);
|
||||
return kmalloc_order_trace(size, flags, order);
|
||||
@@ -573,7 +575,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
|
||||
* Try really hard to succeed the allocation but fail
|
||||
* eventually.
|
||||
*/
|
||||
static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
||||
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
if (__builtin_constant_p(size)) {
|
||||
#ifndef CONFIG_SLOB
|
||||
@@ -595,7 +597,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
||||
return __kmalloc(size, flags);
|
||||
}
|
||||
|
||||
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
#ifndef CONFIG_SLOB
|
||||
if (__builtin_constant_p(size) &&
|
||||
@@ -619,7 +621,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
* @size: element size.
|
||||
* @flags: the type of memory to allocate (see kmalloc).
|
||||
*/
|
||||
static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
size_t bytes;
|
||||
|
||||
@@ -637,8 +639,10 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
* @new_size: new size of a single member of the array
|
||||
* @flags: the type of memory to allocate (see kmalloc)
|
||||
*/
|
||||
static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size,
|
||||
gfp_t flags)
|
||||
static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
|
||||
size_t new_n,
|
||||
size_t new_size,
|
||||
gfp_t flags)
|
||||
{
|
||||
size_t bytes;
|
||||
|
||||
@@ -654,7 +658,7 @@ static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t n
|
||||
* @size: element size.
|
||||
* @flags: the type of memory to allocate (see kmalloc).
|
||||
*/
|
||||
static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
return kmalloc_array(n, size, flags | __GFP_ZERO);
|
||||
}
|
||||
@@ -667,12 +671,13 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
|
||||
* allocator where we care about the real place the memory allocation
|
||||
* request comes from.
|
||||
*/
|
||||
extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
|
||||
extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
|
||||
__alloc_size(1);
|
||||
#define kmalloc_track_caller(size, flags) \
|
||||
__kmalloc_track_caller(size, flags, _RET_IP_)
|
||||
|
||||
static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
|
||||
int node)
|
||||
static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
|
||||
int node)
|
||||
{
|
||||
size_t bytes;
|
||||
|
||||
@@ -683,7 +688,7 @@ static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
|
||||
return __kmalloc_node(bytes, flags, node);
|
||||
}
|
||||
|
||||
static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
|
||||
static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
|
||||
}
|
||||
@@ -691,7 +696,7 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
|
||||
unsigned long caller);
|
||||
unsigned long caller) __alloc_size(1);
|
||||
#define kmalloc_node_track_caller(size, flags, node) \
|
||||
__kmalloc_node_track_caller(size, flags, node, \
|
||||
_RET_IP_)
|
||||
@@ -716,7 +721,7 @@ static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
|
||||
* @size: how many bytes of memory are required.
|
||||
* @flags: the type of memory to allocate (see kmalloc).
|
||||
*/
|
||||
static inline void *kzalloc(size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return kmalloc(size, flags | __GFP_ZERO);
|
||||
}
|
||||
@@ -727,26 +732,26 @@ static inline void *kzalloc(size_t size, gfp_t flags)
|
||||
* @flags: the type of memory to allocate (see kmalloc).
|
||||
* @node: memory node from which to allocate
|
||||
*/
|
||||
static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
|
||||
static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return kmalloc_node(size, flags | __GFP_ZERO, node);
|
||||
}
|
||||
|
||||
extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
|
||||
static inline void *kvmalloc(size_t size, gfp_t flags)
|
||||
extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
|
||||
static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return kvmalloc_node(size, flags, NUMA_NO_NODE);
|
||||
}
|
||||
static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
|
||||
static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return kvmalloc_node(size, flags | __GFP_ZERO, node);
|
||||
}
|
||||
static inline void *kvzalloc(size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return kvmalloc(size, flags | __GFP_ZERO);
|
||||
}
|
||||
|
||||
static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
size_t bytes;
|
||||
|
||||
@@ -756,13 +761,13 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
return kvmalloc(bytes, flags);
|
||||
}
|
||||
|
||||
static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
|
||||
static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
return kvmalloc_array(n, size, flags | __GFP_ZERO);
|
||||
}
|
||||
|
||||
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
|
||||
gfp_t flags);
|
||||
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
|
||||
__alloc_size(3);
|
||||
extern void kvfree(const void *addr);
|
||||
extern void kvfree_sensitive(const void *addr, size_t len);
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ struct notifier_block; /* in notifier.h */
|
||||
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
|
||||
#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
|
||||
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
|
||||
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
|
||||
#define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */
|
||||
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
|
||||
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
|
||||
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
|
||||
@@ -136,21 +136,21 @@ static inline void vmalloc_init(void)
|
||||
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
|
||||
#endif
|
||||
|
||||
extern void *vmalloc(unsigned long size);
|
||||
extern void *vzalloc(unsigned long size);
|
||||
extern void *vmalloc_user(unsigned long size);
|
||||
extern void *vmalloc_node(unsigned long size, int node);
|
||||
extern void *vzalloc_node(unsigned long size, int node);
|
||||
extern void *vmalloc_32(unsigned long size);
|
||||
extern void *vmalloc_32_user(unsigned long size);
|
||||
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
|
||||
extern void *vmalloc(unsigned long size) __alloc_size(1);
|
||||
extern void *vzalloc(unsigned long size) __alloc_size(1);
|
||||
extern void *vmalloc_user(unsigned long size) __alloc_size(1);
|
||||
extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
|
||||
extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
|
||||
extern void *vmalloc_32(unsigned long size) __alloc_size(1);
|
||||
extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
|
||||
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
|
||||
extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
unsigned long start, unsigned long end, gfp_t gfp_mask,
|
||||
pgprot_t prot, unsigned long vm_flags, int node,
|
||||
const void *caller);
|
||||
const void *caller) __alloc_size(1);
|
||||
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
|
||||
int node, const void *caller);
|
||||
void *vmalloc_no_huge(unsigned long size);
|
||||
int node, const void *caller) __alloc_size(1);
|
||||
void *vmalloc_no_huge(unsigned long size) __alloc_size(1);
|
||||
|
||||
extern void vfree(const void *addr);
|
||||
extern void vfree_atomic(const void *addr);
|
||||
|
||||
@@ -13,7 +13,7 @@ struct mm_struct;
|
||||
extern int trace_mmap_lock_reg(void);
|
||||
extern void trace_mmap_lock_unreg(void);
|
||||
|
||||
TRACE_EVENT_FN(mmap_lock_start_locking,
|
||||
DECLARE_EVENT_CLASS(mmap_lock,
|
||||
|
||||
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
|
||||
|
||||
@@ -32,15 +32,23 @@ TRACE_EVENT_FN(mmap_lock_start_locking,
|
||||
),
|
||||
|
||||
TP_printk(
|
||||
"mm=%p memcg_path=%s write=%s\n",
|
||||
"mm=%p memcg_path=%s write=%s",
|
||||
__entry->mm,
|
||||
__get_str(memcg_path),
|
||||
__entry->write ? "true" : "false"
|
||||
),
|
||||
|
||||
trace_mmap_lock_reg, trace_mmap_lock_unreg
|
||||
)
|
||||
);
|
||||
|
||||
#define DEFINE_MMAP_LOCK_EVENT(name) \
|
||||
DEFINE_EVENT_FN(mmap_lock, name, \
|
||||
TP_PROTO(struct mm_struct *mm, const char *memcg_path, \
|
||||
bool write), \
|
||||
TP_ARGS(mm, memcg_path, write), \
|
||||
trace_mmap_lock_reg, trace_mmap_lock_unreg)
|
||||
|
||||
DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking);
|
||||
DEFINE_MMAP_LOCK_EVENT(mmap_lock_released);
|
||||
|
||||
TRACE_EVENT_FN(mmap_lock_acquire_returned,
|
||||
|
||||
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
|
||||
@@ -63,7 +71,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned,
|
||||
),
|
||||
|
||||
TP_printk(
|
||||
"mm=%p memcg_path=%s write=%s success=%s\n",
|
||||
"mm=%p memcg_path=%s write=%s success=%s",
|
||||
__entry->mm,
|
||||
__get_str(memcg_path),
|
||||
__entry->write ? "true" : "false",
|
||||
@@ -73,34 +81,6 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned,
|
||||
trace_mmap_lock_reg, trace_mmap_lock_unreg
|
||||
);
|
||||
|
||||
TRACE_EVENT_FN(mmap_lock_released,
|
||||
|
||||
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
|
||||
|
||||
TP_ARGS(mm, memcg_path, write),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct mm_struct *, mm)
|
||||
__string(memcg_path, memcg_path)
|
||||
__field(bool, write)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mm = mm;
|
||||
__assign_str(memcg_path, memcg_path);
|
||||
__entry->write = write;
|
||||
),
|
||||
|
||||
TP_printk(
|
||||
"mm=%p memcg_path=%s write=%s\n",
|
||||
__entry->mm,
|
||||
__get_str(memcg_path),
|
||||
__entry->write ? "true" : "false"
|
||||
),
|
||||
|
||||
trace_mmap_lock_reg, trace_mmap_lock_unreg
|
||||
);
|
||||
|
||||
#endif /* _TRACE_MMAP_LOCK_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -137,7 +137,7 @@ static void __acct_update_integrals(struct task_struct *tsk,
|
||||
* the rest of the math is done in xacct_add_tsk.
|
||||
*/
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
|
||||
tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -292,8 +292,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
|
||||
|
||||
memset(wb, 0, sizeof(*wb));
|
||||
|
||||
if (wb != &bdi->wb)
|
||||
bdi_get(bdi);
|
||||
wb->bdi = bdi;
|
||||
wb->last_old_flush = jiffies;
|
||||
INIT_LIST_HEAD(&wb->b_dirty);
|
||||
@@ -317,7 +315,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
|
||||
|
||||
err = fprop_local_init_percpu(&wb->completions, gfp);
|
||||
if (err)
|
||||
goto out_put_bdi;
|
||||
return err;
|
||||
|
||||
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
|
||||
err = percpu_counter_init(&wb->stat[i], 0, gfp);
|
||||
@@ -331,9 +329,6 @@ out_destroy_stat:
|
||||
while (i--)
|
||||
percpu_counter_destroy(&wb->stat[i]);
|
||||
fprop_local_destroy_percpu(&wb->completions);
|
||||
out_put_bdi:
|
||||
if (wb != &bdi->wb)
|
||||
bdi_put(bdi);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -374,8 +369,6 @@ static void wb_exit(struct bdi_writeback *wb)
|
||||
percpu_counter_destroy(&wb->stat[i]);
|
||||
|
||||
fprop_local_destroy_percpu(&wb->completions);
|
||||
if (wb != &wb->bdi->wb)
|
||||
bdi_put(wb->bdi);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
@@ -398,6 +391,7 @@ static void cgwb_release_workfn(struct work_struct *work)
|
||||
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
|
||||
release_work);
|
||||
struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
|
||||
struct backing_dev_info *bdi = wb->bdi;
|
||||
|
||||
mutex_lock(&wb->bdi->cgwb_release_mutex);
|
||||
wb_shutdown(wb);
|
||||
@@ -417,6 +411,7 @@ static void cgwb_release_workfn(struct work_struct *work)
|
||||
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
wb_exit(wb);
|
||||
bdi_put(bdi);
|
||||
WARN_ON_ONCE(!list_empty(&wb->b_attached));
|
||||
kfree_rcu(wb, rcu);
|
||||
}
|
||||
@@ -498,6 +493,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
|
||||
INIT_LIST_HEAD(&wb->b_attached);
|
||||
INIT_WORK(&wb->release_work, cgwb_release_workfn);
|
||||
set_bit(WB_registered, &wb->state);
|
||||
bdi_get(bdi);
|
||||
|
||||
/*
|
||||
* The root wb determines the registered state of the whole bdi and
|
||||
@@ -529,6 +525,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
|
||||
goto out_put;
|
||||
|
||||
err_fprop_exit:
|
||||
bdi_put(bdi);
|
||||
fprop_local_destroy_percpu(&wb->memcg_completions);
|
||||
err_ref_exit:
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
@@ -959,14 +956,14 @@ void bdi_unregister(struct backing_dev_info *bdi)
|
||||
bdi->owner = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(bdi_unregister);
|
||||
|
||||
static void release_bdi(struct kref *ref)
|
||||
{
|
||||
struct backing_dev_info *bdi =
|
||||
container_of(ref, struct backing_dev_info, refcnt);
|
||||
|
||||
if (test_bit(WB_registered, &bdi->wb.state))
|
||||
bdi_unregister(bdi);
|
||||
WARN_ON_ONCE(test_bit(WB_registered, &bdi->wb.state));
|
||||
WARN_ON_ONCE(bdi->dev);
|
||||
wb_exit(&bdi->wb);
|
||||
kfree(bdi);
|
||||
|
||||
65
mm/filemap.c
65
mm/filemap.c
@@ -638,6 +638,30 @@ static bool mapping_needs_writeback(struct address_space *mapping)
|
||||
return mapping->nrpages;
|
||||
}
|
||||
|
||||
static bool filemap_range_has_writeback(struct address_space *mapping,
|
||||
loff_t start_byte, loff_t end_byte)
|
||||
{
|
||||
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
|
||||
pgoff_t max = end_byte >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
|
||||
if (end_byte < start_byte)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
xas_for_each(&xas, page, max) {
|
||||
if (xas_retry(&xas, page))
|
||||
continue;
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return page != NULL;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_range_needs_writeback - check if range potentially needs writeback
|
||||
* @mapping: address space within which to check
|
||||
@@ -655,29 +679,12 @@ static bool mapping_needs_writeback(struct address_space *mapping)
|
||||
bool filemap_range_needs_writeback(struct address_space *mapping,
|
||||
loff_t start_byte, loff_t end_byte)
|
||||
{
|
||||
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
|
||||
pgoff_t max = end_byte >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
|
||||
if (!mapping_needs_writeback(mapping))
|
||||
return false;
|
||||
if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
|
||||
!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
|
||||
return false;
|
||||
if (end_byte < start_byte)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
xas_for_each(&xas, page, max) {
|
||||
if (xas_retry(&xas, page))
|
||||
continue;
|
||||
if (xa_is_value(page))
|
||||
continue;
|
||||
if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return page != NULL;
|
||||
return filemap_range_has_writeback(mapping, start_byte, end_byte);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
|
||||
|
||||
@@ -2088,7 +2095,6 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
|
||||
if (!xa_is_value(page)) {
|
||||
if (page->index < start)
|
||||
goto put;
|
||||
VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
|
||||
if (page->index + thp_nr_pages(page) - 1 > end)
|
||||
goto put;
|
||||
if (!trylock_page(page))
|
||||
@@ -2621,6 +2627,9 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
|
||||
iocb->ki_flags |= IOCB_NOWAIT;
|
||||
|
||||
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
|
||||
break;
|
||||
|
||||
error = filemap_get_pages(iocb, iter, &pvec);
|
||||
if (error < 0)
|
||||
break;
|
||||
@@ -2733,9 +2742,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
loff_t size;
|
||||
|
||||
size = i_size_read(inode);
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
|
||||
iocb->ki_pos + count - 1))
|
||||
@@ -2767,8 +2774,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* the rest of the read. Buffered reads will not work for
|
||||
* DAX files, so don't bother trying.
|
||||
*/
|
||||
if (retval < 0 || !count || iocb->ki_pos >= size ||
|
||||
IS_DAX(inode))
|
||||
if (retval < 0 || !count || IS_DAX(inode))
|
||||
return retval;
|
||||
if (iocb->ki_pos >= i_size_read(inode))
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -3201,15 +3209,8 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
|
||||
}
|
||||
}
|
||||
|
||||
if (pmd_none(*vmf->pmd)) {
|
||||
vmf->ptl = pmd_lock(mm, vmf->pmd);
|
||||
if (likely(pmd_none(*vmf->pmd))) {
|
||||
mm_inc_nr_ptes(mm);
|
||||
pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
|
||||
vmf->prealloc_pte = NULL;
|
||||
}
|
||||
spin_unlock(vmf->ptl);
|
||||
}
|
||||
if (pmd_none(*vmf->pmd))
|
||||
pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
|
||||
|
||||
/* See comment in handle_pte_fault() */
|
||||
if (pmd_devmap_trans_unstable(vmf->pmd)) {
|
||||
|
||||
5
mm/gup.c
5
mm/gup.c
@@ -2365,7 +2365,6 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
|
||||
{
|
||||
int nr_start = *nr;
|
||||
struct dev_pagemap *pgmap = NULL;
|
||||
int ret = 1;
|
||||
|
||||
do {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
@@ -2373,14 +2372,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
|
||||
pgmap = get_dev_pagemap(pfn, pgmap);
|
||||
if (unlikely(!pgmap)) {
|
||||
undo_dev_pagemap(nr, nr_start, flags, pages);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
SetPageReferenced(page);
|
||||
pages[*nr] = page;
|
||||
if (unlikely(!try_grab_page(page, flags))) {
|
||||
undo_dev_pagemap(nr, nr_start, flags, pages);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
(*nr)++;
|
||||
@@ -2388,7 +2385,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
put_dev_pagemap(pgmap);
|
||||
return ret;
|
||||
return addr == end;
|
||||
}
|
||||
|
||||
static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
||||
|
||||
@@ -47,6 +47,7 @@ bool __folio_end_writeback(struct folio *folio);
|
||||
|
||||
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
|
||||
unsigned long floor, unsigned long ceiling);
|
||||
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
|
||||
|
||||
static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
|
||||
@@ -15,18 +15,29 @@
|
||||
#include "slab.h"
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
static LIST_HEAD(list_lrus);
|
||||
static LIST_HEAD(memcg_list_lrus);
|
||||
static DEFINE_MUTEX(list_lrus_mutex);
|
||||
|
||||
static inline bool list_lru_memcg_aware(struct list_lru *lru)
|
||||
{
|
||||
return lru->memcg_aware;
|
||||
}
|
||||
|
||||
static void list_lru_register(struct list_lru *lru)
|
||||
{
|
||||
if (!list_lru_memcg_aware(lru))
|
||||
return;
|
||||
|
||||
mutex_lock(&list_lrus_mutex);
|
||||
list_add(&lru->list, &list_lrus);
|
||||
list_add(&lru->list, &memcg_list_lrus);
|
||||
mutex_unlock(&list_lrus_mutex);
|
||||
}
|
||||
|
||||
static void list_lru_unregister(struct list_lru *lru)
|
||||
{
|
||||
if (!list_lru_memcg_aware(lru))
|
||||
return;
|
||||
|
||||
mutex_lock(&list_lrus_mutex);
|
||||
list_del(&lru->list);
|
||||
mutex_unlock(&list_lrus_mutex);
|
||||
@@ -37,11 +48,6 @@ static int lru_shrinker_id(struct list_lru *lru)
|
||||
return lru->shrinker_id;
|
||||
}
|
||||
|
||||
static inline bool list_lru_memcg_aware(struct list_lru *lru)
|
||||
{
|
||||
return lru->memcg_aware;
|
||||
}
|
||||
|
||||
static inline struct list_lru_one *
|
||||
list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
|
||||
{
|
||||
@@ -176,13 +182,16 @@ unsigned long list_lru_count_one(struct list_lru *lru,
|
||||
{
|
||||
struct list_lru_node *nlru = &lru->node[nid];
|
||||
struct list_lru_one *l;
|
||||
unsigned long count;
|
||||
long count;
|
||||
|
||||
rcu_read_lock();
|
||||
l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
|
||||
count = READ_ONCE(l->nr_items);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (unlikely(count < 0))
|
||||
count = 0;
|
||||
|
||||
return count;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(list_lru_count_one);
|
||||
@@ -354,8 +363,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
|
||||
struct list_lru_memcg *memcg_lrus;
|
||||
int size = memcg_nr_cache_ids;
|
||||
|
||||
memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
|
||||
size * sizeof(void *), GFP_KERNEL);
|
||||
memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL);
|
||||
if (!memcg_lrus)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -389,7 +397,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
|
||||
|
||||
old = rcu_dereference_protected(nlru->memcg_lrus,
|
||||
lockdep_is_held(&list_lrus_mutex));
|
||||
new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
|
||||
new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -398,19 +406,8 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
|
||||
|
||||
/*
|
||||
* The locking below allows readers that hold nlru->lock avoid taking
|
||||
* rcu_read_lock (see list_lru_from_memcg_idx).
|
||||
*
|
||||
* Since list_lru_{add,del} may be called under an IRQ-safe lock,
|
||||
* we have to use IRQ-safe primitives here to avoid deadlock.
|
||||
*/
|
||||
spin_lock_irq(&nlru->lock);
|
||||
memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size));
|
||||
rcu_assign_pointer(nlru->memcg_lrus, new);
|
||||
spin_unlock_irq(&nlru->lock);
|
||||
|
||||
kvfree_rcu(old, rcu);
|
||||
return 0;
|
||||
}
|
||||
@@ -466,9 +463,6 @@ static int memcg_update_list_lru(struct list_lru *lru,
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!list_lru_memcg_aware(lru))
|
||||
return 0;
|
||||
|
||||
for_each_node(i) {
|
||||
if (memcg_update_list_lru_node(&lru->node[i],
|
||||
old_size, new_size))
|
||||
@@ -491,9 +485,6 @@ static void memcg_cancel_update_list_lru(struct list_lru *lru,
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!list_lru_memcg_aware(lru))
|
||||
return;
|
||||
|
||||
for_each_node(i)
|
||||
memcg_cancel_update_list_lru_node(&lru->node[i],
|
||||
old_size, new_size);
|
||||
@@ -506,7 +497,7 @@ int memcg_update_all_list_lrus(int new_size)
|
||||
int old_size = memcg_nr_cache_ids;
|
||||
|
||||
mutex_lock(&list_lrus_mutex);
|
||||
list_for_each_entry(lru, &list_lrus, list) {
|
||||
list_for_each_entry(lru, &memcg_list_lrus, list) {
|
||||
ret = memcg_update_list_lru(lru, old_size, new_size);
|
||||
if (ret)
|
||||
goto fail;
|
||||
@@ -515,7 +506,7 @@ out:
|
||||
mutex_unlock(&list_lrus_mutex);
|
||||
return ret;
|
||||
fail:
|
||||
list_for_each_entry_continue_reverse(lru, &list_lrus, list)
|
||||
list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list)
|
||||
memcg_cancel_update_list_lru(lru, old_size, new_size);
|
||||
goto out;
|
||||
}
|
||||
@@ -552,9 +543,6 @@ static void memcg_drain_list_lru(struct list_lru *lru,
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!list_lru_memcg_aware(lru))
|
||||
return;
|
||||
|
||||
for_each_node(i)
|
||||
memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
|
||||
}
|
||||
@@ -564,7 +552,7 @@ void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
|
||||
struct list_lru *lru;
|
||||
|
||||
mutex_lock(&list_lrus_mutex);
|
||||
list_for_each_entry(lru, &list_lrus, list)
|
||||
list_for_each_entry(lru, &memcg_list_lrus, list)
|
||||
memcg_drain_list_lru(lru, src_idx, dst_memcg);
|
||||
mutex_unlock(&list_lrus_mutex);
|
||||
}
|
||||
|
||||
203
mm/memcontrol.c
203
mm/memcontrol.c
@@ -103,11 +103,6 @@ static bool do_memsw_account(void)
|
||||
return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
|
||||
}
|
||||
|
||||
/* memcg and lruvec stats flushing */
|
||||
static void flush_memcg_stats_dwork(struct work_struct *w);
|
||||
static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
|
||||
static DEFINE_SPINLOCK(stats_flush_lock);
|
||||
|
||||
#define THRESHOLDS_EVENTS_TARGET 128
|
||||
#define SOFTLIMIT_EVENTS_TARGET 1024
|
||||
|
||||
@@ -239,7 +234,7 @@ enum res_type {
|
||||
iter != NULL; \
|
||||
iter = mem_cgroup_iter(NULL, iter, NULL))
|
||||
|
||||
static inline bool should_force_charge(void)
|
||||
static inline bool task_is_dying(void)
|
||||
{
|
||||
return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
|
||||
(current->flags & PF_EXITING);
|
||||
@@ -613,6 +608,58 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
|
||||
return mz;
|
||||
}
|
||||
|
||||
/*
|
||||
* memcg and lruvec stats flushing
|
||||
*
|
||||
* Many codepaths leading to stats update or read are performance sensitive and
|
||||
* adding stats flushing in such codepaths is not desirable. So, to optimize the
|
||||
* flushing the kernel does:
|
||||
*
|
||||
* 1) Periodically and asynchronously flush the stats every 2 seconds to not let
|
||||
* rstat update tree grow unbounded.
|
||||
*
|
||||
* 2) Flush the stats synchronously on reader side only when there are more than
|
||||
* (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization
|
||||
* will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but
|
||||
* only for 2 seconds due to (1).
|
||||
*/
|
||||
static void flush_memcg_stats_dwork(struct work_struct *w);
|
||||
static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
|
||||
static DEFINE_SPINLOCK(stats_flush_lock);
|
||||
static DEFINE_PER_CPU(unsigned int, stats_updates);
|
||||
static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
|
||||
|
||||
static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
|
||||
{
|
||||
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
|
||||
if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
|
||||
atomic_inc(&stats_flush_threshold);
|
||||
}
|
||||
|
||||
static void __mem_cgroup_flush_stats(void)
|
||||
{
|
||||
unsigned long flag;
|
||||
|
||||
if (!spin_trylock_irqsave(&stats_flush_lock, flag))
|
||||
return;
|
||||
|
||||
cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
|
||||
atomic_set(&stats_flush_threshold, 0);
|
||||
spin_unlock_irqrestore(&stats_flush_lock, flag);
|
||||
}
|
||||
|
||||
void mem_cgroup_flush_stats(void)
|
||||
{
|
||||
if (atomic_read(&stats_flush_threshold) > num_online_cpus())
|
||||
__mem_cgroup_flush_stats();
|
||||
}
|
||||
|
||||
static void flush_memcg_stats_dwork(struct work_struct *w)
|
||||
{
|
||||
mem_cgroup_flush_stats();
|
||||
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
|
||||
}
|
||||
|
||||
/**
|
||||
* __mod_memcg_state - update cgroup memory statistics
|
||||
* @memcg: the memory cgroup
|
||||
@@ -625,7 +672,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
|
||||
return;
|
||||
|
||||
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
|
||||
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
|
||||
memcg_rstat_updated(memcg);
|
||||
}
|
||||
|
||||
/* idx can be of type enum memcg_stat_item or node_stat_item. */
|
||||
@@ -653,10 +700,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
|
||||
memcg = pn->memcg;
|
||||
|
||||
/* Update memcg */
|
||||
__mod_memcg_state(memcg, idx, val);
|
||||
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
|
||||
|
||||
/* Update lruvec */
|
||||
__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
|
||||
|
||||
memcg_rstat_updated(memcg);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -758,7 +807,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
|
||||
return;
|
||||
|
||||
__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
|
||||
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
|
||||
memcg_rstat_updated(memcg);
|
||||
}
|
||||
|
||||
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
|
||||
@@ -1415,7 +1464,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
|
||||
*
|
||||
* Current memory state:
|
||||
*/
|
||||
cgroup_rstat_flush(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
|
||||
u64 size;
|
||||
@@ -1576,7 +1625,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
* A few threads which were not waiting at mutex_lock_killable() can
|
||||
* fail to bail out. Therefore, check again after holding oom_lock.
|
||||
*/
|
||||
ret = should_force_charge() || out_of_memory(&oc);
|
||||
ret = task_is_dying() || out_of_memory(&oc);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&oom_lock);
|
||||
@@ -2544,6 +2593,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
struct page_counter *counter;
|
||||
enum oom_status oom_status;
|
||||
unsigned long nr_reclaimed;
|
||||
bool passed_oom = false;
|
||||
bool may_swap = true;
|
||||
bool drained = false;
|
||||
unsigned long pflags;
|
||||
@@ -2578,15 +2628,6 @@ retry:
|
||||
if (gfp_mask & __GFP_ATOMIC)
|
||||
goto force;
|
||||
|
||||
/*
|
||||
* Unlike in global OOM situations, memcg is not in a physical
|
||||
* memory shortage. Allow dying and OOM-killed tasks to
|
||||
* bypass the last charges so that they can exit quickly and
|
||||
* free their memory.
|
||||
*/
|
||||
if (unlikely(should_force_charge()))
|
||||
goto force;
|
||||
|
||||
/*
|
||||
* Prevent unbounded recursion when reclaim operations need to
|
||||
* allocate memory. This might exceed the limits temporarily,
|
||||
@@ -2644,8 +2685,9 @@ retry:
|
||||
if (gfp_mask & __GFP_RETRY_MAYFAIL)
|
||||
goto nomem;
|
||||
|
||||
if (fatal_signal_pending(current))
|
||||
goto force;
|
||||
/* Avoid endless loop for tasks bypassed by the oom killer */
|
||||
if (passed_oom && task_is_dying())
|
||||
goto nomem;
|
||||
|
||||
/*
|
||||
* keep retrying as long as the memcg oom killer is able to make
|
||||
@@ -2654,14 +2696,10 @@ retry:
|
||||
*/
|
||||
oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
|
||||
get_order(nr_pages * PAGE_SIZE));
|
||||
switch (oom_status) {
|
||||
case OOM_SUCCESS:
|
||||
if (oom_status == OOM_SUCCESS) {
|
||||
passed_oom = true;
|
||||
nr_retries = MAX_RECLAIM_RETRIES;
|
||||
goto retry;
|
||||
case OOM_FAILED:
|
||||
goto force;
|
||||
default:
|
||||
goto nomem;
|
||||
}
|
||||
nomem:
|
||||
if (!(gfp_mask & __GFP_NOFAIL))
|
||||
@@ -2736,8 +2774,7 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
return try_charge_memcg(memcg, gfp_mask, nr_pages);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU)
|
||||
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
{
|
||||
if (mem_cgroup_is_root(memcg))
|
||||
return;
|
||||
@@ -2746,7 +2783,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
if (do_memsw_account())
|
||||
page_counter_uncharge(&memcg->memsw, nr_pages);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
|
||||
{
|
||||
@@ -2965,7 +3001,6 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
|
||||
static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
struct page_counter *counter;
|
||||
struct mem_cgroup *memcg;
|
||||
int ret;
|
||||
|
||||
@@ -2975,21 +3010,8 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
|
||||
!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
|
||||
|
||||
/*
|
||||
* Enforce __GFP_NOFAIL allocation because callers are not
|
||||
* prepared to see failures and likely do not have any failure
|
||||
* handling code.
|
||||
*/
|
||||
if (gfp & __GFP_NOFAIL) {
|
||||
page_counter_charge(&memcg->kmem, nr_pages);
|
||||
goto out;
|
||||
}
|
||||
cancel_charge(memcg, nr_pages);
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
|
||||
page_counter_charge(&memcg->kmem, nr_pages);
|
||||
out:
|
||||
css_put(&memcg->css);
|
||||
|
||||
@@ -3534,8 +3556,7 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
|
||||
unsigned long val;
|
||||
|
||||
if (mem_cgroup_is_root(memcg)) {
|
||||
/* mem_cgroup_threshold() calls here from irqsafe context */
|
||||
cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
val = memcg_page_state(memcg, NR_FILE_PAGES) +
|
||||
memcg_page_state(memcg, NR_ANON_MAPPED);
|
||||
if (swap)
|
||||
@@ -3610,7 +3631,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
|
||||
return 0;
|
||||
|
||||
BUG_ON(memcg->kmemcg_id >= 0);
|
||||
BUG_ON(memcg->kmem_state);
|
||||
|
||||
memcg_id = memcg_alloc_cache_id();
|
||||
if (memcg_id < 0)
|
||||
@@ -3627,22 +3647,18 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
|
||||
static_branch_enable(&memcg_kmem_enabled_key);
|
||||
|
||||
memcg->kmemcg_id = memcg_id;
|
||||
memcg->kmem_state = KMEM_ONLINE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void memcg_offline_kmem(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct mem_cgroup *parent, *child;
|
||||
struct mem_cgroup *parent;
|
||||
int kmemcg_id;
|
||||
|
||||
if (memcg->kmem_state != KMEM_ONLINE)
|
||||
if (memcg->kmemcg_id == -1)
|
||||
return;
|
||||
|
||||
memcg->kmem_state = KMEM_ALLOCATED;
|
||||
|
||||
parent = parent_mem_cgroup(memcg);
|
||||
if (!parent)
|
||||
parent = root_mem_cgroup;
|
||||
@@ -3653,31 +3669,15 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
|
||||
BUG_ON(kmemcg_id < 0);
|
||||
|
||||
/*
|
||||
* Change kmemcg_id of this cgroup and all its descendants to the
|
||||
* parent's id, and then move all entries from this cgroup's list_lrus
|
||||
* to ones of the parent. After we have finished, all list_lrus
|
||||
* corresponding to this cgroup are guaranteed to remain empty. The
|
||||
* ordering is imposed by list_lru_node->lock taken by
|
||||
* After we have finished memcg_reparent_objcgs(), all list_lrus
|
||||
* corresponding to this cgroup are guaranteed to remain empty.
|
||||
* The ordering is imposed by list_lru_node->lock taken by
|
||||
* memcg_drain_all_list_lrus().
|
||||
*/
|
||||
rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */
|
||||
css_for_each_descendant_pre(css, &memcg->css) {
|
||||
child = mem_cgroup_from_css(css);
|
||||
BUG_ON(child->kmemcg_id != kmemcg_id);
|
||||
child->kmemcg_id = parent->kmemcg_id;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
memcg_drain_all_list_lrus(kmemcg_id, parent);
|
||||
|
||||
memcg_free_cache_id(kmemcg_id);
|
||||
}
|
||||
|
||||
static void memcg_free_kmem(struct mem_cgroup *memcg)
|
||||
{
|
||||
/* css_alloc() failed, offlining didn't happen */
|
||||
if (unlikely(memcg->kmem_state == KMEM_ONLINE))
|
||||
memcg_offline_kmem(memcg);
|
||||
memcg->kmemcg_id = -1;
|
||||
}
|
||||
#else
|
||||
static int memcg_online_kmem(struct mem_cgroup *memcg)
|
||||
@@ -3687,22 +3687,8 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
|
||||
static void memcg_offline_kmem(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
static void memcg_free_kmem(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
static int memcg_update_kmem_max(struct mem_cgroup *memcg,
|
||||
unsigned long max)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&memcg_max_mutex);
|
||||
ret = page_counter_set_max(&memcg->kmem, max);
|
||||
mutex_unlock(&memcg_max_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
|
||||
{
|
||||
int ret;
|
||||
@@ -3768,10 +3754,8 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
|
||||
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
|
||||
break;
|
||||
case _KMEM:
|
||||
pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
|
||||
"Please report your usecase to linux-mm@kvack.org if you "
|
||||
"depend on this functionality.\n");
|
||||
ret = memcg_update_kmem_max(memcg, nr_pages);
|
||||
/* kmem.limit_in_bytes is deprecated. */
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
case _TCP:
|
||||
ret = memcg_update_tcp_max(memcg, nr_pages);
|
||||
@@ -3916,7 +3900,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
|
||||
int nid;
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
cgroup_rstat_flush(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
|
||||
seq_printf(m, "%s=%lu", stat->name,
|
||||
@@ -3988,7 +3972,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
|
||||
|
||||
cgroup_rstat_flush(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
|
||||
unsigned long nr;
|
||||
@@ -4491,7 +4475,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
||||
struct mem_cgroup *parent;
|
||||
|
||||
cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
|
||||
*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
|
||||
@@ -5324,7 +5308,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
cancel_work_sync(&memcg->high_work);
|
||||
mem_cgroup_remove_from_trees(memcg);
|
||||
free_shrinker_info(memcg);
|
||||
memcg_free_kmem(memcg);
|
||||
|
||||
/* Need to offline kmem if online_css() fails */
|
||||
memcg_offline_kmem(memcg);
|
||||
mem_cgroup_free(memcg);
|
||||
}
|
||||
|
||||
@@ -5357,21 +5343,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
|
||||
memcg_wb_domain_size_changed(memcg);
|
||||
}
|
||||
|
||||
void mem_cgroup_flush_stats(void)
|
||||
{
|
||||
if (!spin_trylock(&stats_flush_lock))
|
||||
return;
|
||||
|
||||
cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
|
||||
spin_unlock(&stats_flush_lock);
|
||||
}
|
||||
|
||||
static void flush_memcg_stats_dwork(struct work_struct *w)
|
||||
{
|
||||
mem_cgroup_flush_stats();
|
||||
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
|
||||
}
|
||||
|
||||
static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
@@ -5561,7 +5532,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
||||
#endif
|
||||
|
||||
static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t ptent, swp_entry_t *entry)
|
||||
unsigned long addr, pte_t ptent)
|
||||
{
|
||||
if (!vma->vm_file) /* anonymous vma */
|
||||
return NULL;
|
||||
@@ -5736,7 +5707,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
|
||||
else if (is_swap_pte(ptent))
|
||||
page = mc_handle_swap_pte(vma, ptent, &ent);
|
||||
else if (pte_none(ptent))
|
||||
page = mc_handle_file_pte(vma, addr, ptent, &ent);
|
||||
page = mc_handle_file_pte(vma, addr, ptent);
|
||||
|
||||
if (!page && !ent.val)
|
||||
return ret;
|
||||
@@ -6391,7 +6362,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
|
||||
int i;
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
|
||||
|
||||
cgroup_rstat_flush(memcg->css.cgroup);
|
||||
mem_cgroup_flush_stats();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
|
||||
int nid;
|
||||
|
||||
162
mm/memory.c
162
mm/memory.c
@@ -435,35 +435,39 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
}
|
||||
}
|
||||
|
||||
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
|
||||
{
|
||||
spinlock_t *ptl = pmd_lock(mm, pmd);
|
||||
|
||||
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
|
||||
mm_inc_nr_ptes(mm);
|
||||
/*
|
||||
* Ensure all pte setup (eg. pte page lock and page clearing) are
|
||||
* visible before the pte is made visible to other CPUs by being
|
||||
* put into page tables.
|
||||
*
|
||||
* The other side of the story is the pointer chasing in the page
|
||||
* table walking code (when walking the page table without locking;
|
||||
* ie. most of the time). Fortunately, these data accesses consist
|
||||
* of a chain of data-dependent loads, meaning most CPUs (alpha
|
||||
* being the notable exception) will already guarantee loads are
|
||||
* seen in-order. See the alpha page table accessors for the
|
||||
* smp_rmb() barriers in page table walking code.
|
||||
*/
|
||||
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
|
||||
pmd_populate(mm, pmd, *pte);
|
||||
*pte = NULL;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
|
||||
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
pgtable_t new = pte_alloc_one(mm);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Ensure all pte setup (eg. pte page lock and page clearing) are
|
||||
* visible before the pte is made visible to other CPUs by being
|
||||
* put into page tables.
|
||||
*
|
||||
* The other side of the story is the pointer chasing in the page
|
||||
* table walking code (when walking the page table without locking;
|
||||
* ie. most of the time). Fortunately, these data accesses consist
|
||||
* of a chain of data-dependent loads, meaning most CPUs (alpha
|
||||
* being the notable exception) will already guarantee loads are
|
||||
* seen in-order. See the alpha page table accessors for the
|
||||
* smp_rmb() barriers in page table walking code.
|
||||
*/
|
||||
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
|
||||
|
||||
ptl = pmd_lock(mm, pmd);
|
||||
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
|
||||
mm_inc_nr_ptes(mm);
|
||||
pmd_populate(mm, pmd, new);
|
||||
new = NULL;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
pmd_install(mm, pmd, &new);
|
||||
if (new)
|
||||
pte_free(mm, new);
|
||||
return 0;
|
||||
@@ -475,10 +479,9 @@ int __pte_alloc_kernel(pmd_t *pmd)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
smp_wmb(); /* See comment in __pte_alloc */
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
|
||||
smp_wmb(); /* See comment in pmd_install() */
|
||||
pmd_populate_kernel(&init_mm, pmd, new);
|
||||
new = NULL;
|
||||
}
|
||||
@@ -1335,16 +1338,8 @@ again:
|
||||
struct page *page;
|
||||
|
||||
page = vm_normal_page(vma, addr, ptent);
|
||||
if (unlikely(details) && page) {
|
||||
/*
|
||||
* unmap_shared_mapping_pages() wants to
|
||||
* invalidate cache without truncating:
|
||||
* unmap shared but keep private pages.
|
||||
*/
|
||||
if (details->check_mapping &&
|
||||
details->check_mapping != page_rmapping(page))
|
||||
continue;
|
||||
}
|
||||
if (unlikely(zap_skip_check_mapping(details, page)))
|
||||
continue;
|
||||
ptent = ptep_get_and_clear_full(mm, addr, pte,
|
||||
tlb->fullmm);
|
||||
tlb_remove_tlb_entry(tlb, pte, addr);
|
||||
@@ -1377,17 +1372,8 @@ again:
|
||||
is_device_exclusive_entry(entry)) {
|
||||
struct page *page = pfn_swap_entry_to_page(entry);
|
||||
|
||||
if (unlikely(details && details->check_mapping)) {
|
||||
/*
|
||||
* unmap_shared_mapping_pages() wants to
|
||||
* invalidate cache without truncating:
|
||||
* unmap shared but keep private pages.
|
||||
*/
|
||||
if (details->check_mapping !=
|
||||
page_rmapping(page))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(zap_skip_check_mapping(details, page)))
|
||||
continue;
|
||||
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
|
||||
rss[mm_counter(page)]--;
|
||||
|
||||
@@ -2726,19 +2712,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
|
||||
* proceeding (but do_wp_page is only called after already making such a check;
|
||||
* and do_anonymous_page can safely check later on).
|
||||
*/
|
||||
static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
|
||||
pte_t *page_table, pte_t orig_pte)
|
||||
static inline int pte_unmap_same(struct vm_fault *vmf)
|
||||
{
|
||||
int same = 1;
|
||||
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
|
||||
if (sizeof(pte_t) > sizeof(unsigned long)) {
|
||||
spinlock_t *ptl = pte_lockptr(mm, pmd);
|
||||
spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
|
||||
spin_lock(ptl);
|
||||
same = pte_same(*page_table, orig_pte);
|
||||
same = pte_same(*vmf->pte, vmf->orig_pte);
|
||||
spin_unlock(ptl);
|
||||
}
|
||||
#endif
|
||||
pte_unmap(page_table);
|
||||
pte_unmap(vmf->pte);
|
||||
vmf->pte = NULL;
|
||||
return same;
|
||||
}
|
||||
|
||||
@@ -3323,20 +3309,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
|
||||
pgoff_t first_index,
|
||||
pgoff_t last_index,
|
||||
struct zap_details *details)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
pgoff_t vba, vea, zba, zea;
|
||||
|
||||
vma_interval_tree_foreach(vma, root,
|
||||
details->first_index, details->last_index) {
|
||||
|
||||
vma_interval_tree_foreach(vma, root, first_index, last_index) {
|
||||
vba = vma->vm_pgoff;
|
||||
vea = vba + vma_pages(vma) - 1;
|
||||
zba = details->first_index;
|
||||
zba = first_index;
|
||||
if (zba < vba)
|
||||
zba = vba;
|
||||
zea = details->last_index;
|
||||
zea = last_index;
|
||||
if (zea > vea)
|
||||
zea = vea;
|
||||
|
||||
@@ -3362,18 +3348,22 @@ void unmap_mapping_page(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct zap_details details = { };
|
||||
pgoff_t first_index;
|
||||
pgoff_t last_index;
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(PageTail(page));
|
||||
|
||||
details.check_mapping = mapping;
|
||||
details.first_index = page->index;
|
||||
details.last_index = page->index + thp_nr_pages(page) - 1;
|
||||
first_index = page->index;
|
||||
last_index = page->index + thp_nr_pages(page) - 1;
|
||||
|
||||
details.zap_mapping = mapping;
|
||||
details.single_page = page;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
|
||||
last_index, &details);
|
||||
i_mmap_unlock_write(mapping);
|
||||
}
|
||||
|
||||
@@ -3393,16 +3383,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
|
||||
pgoff_t nr, bool even_cows)
|
||||
{
|
||||
struct zap_details details = { };
|
||||
pgoff_t first_index = start;
|
||||
pgoff_t last_index = start + nr - 1;
|
||||
|
||||
details.check_mapping = even_cows ? NULL : mapping;
|
||||
details.first_index = start;
|
||||
details.last_index = start + nr - 1;
|
||||
if (details.last_index < details.first_index)
|
||||
details.last_index = ULONG_MAX;
|
||||
details.zap_mapping = even_cows ? NULL : mapping;
|
||||
if (last_index < first_index)
|
||||
last_index = ULONG_MAX;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
|
||||
last_index, &details);
|
||||
i_mmap_unlock_write(mapping);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unmap_mapping_pages);
|
||||
@@ -3490,7 +3481,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
vm_fault_t ret = 0;
|
||||
void *shadow = NULL;
|
||||
|
||||
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
|
||||
if (!pte_unmap_same(vmf))
|
||||
goto out;
|
||||
|
||||
entry = pte_to_swp_entry(vmf->orig_pte);
|
||||
@@ -3857,7 +3848,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
|
||||
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
|
||||
if (!vmf->prealloc_pte)
|
||||
return VM_FAULT_OOM;
|
||||
smp_wmb(); /* See comment in __pte_alloc() */
|
||||
}
|
||||
|
||||
ret = vma->vm_ops->fault(vmf);
|
||||
@@ -3928,7 +3918,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
|
||||
if (!vmf->prealloc_pte)
|
||||
return VM_FAULT_OOM;
|
||||
smp_wmb(); /* See comment in __pte_alloc() */
|
||||
}
|
||||
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
@@ -4041,17 +4030,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (vmf->prealloc_pte) {
|
||||
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
|
||||
if (likely(pmd_none(*vmf->pmd))) {
|
||||
mm_inc_nr_ptes(vma->vm_mm);
|
||||
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
|
||||
vmf->prealloc_pte = NULL;
|
||||
}
|
||||
spin_unlock(vmf->ptl);
|
||||
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
|
||||
if (vmf->prealloc_pte)
|
||||
pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
|
||||
else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
|
||||
return VM_FAULT_OOM;
|
||||
}
|
||||
}
|
||||
|
||||
/* See comment in handle_pte_fault() */
|
||||
@@ -4160,7 +4142,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
|
||||
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
|
||||
if (!vmf->prealloc_pte)
|
||||
return VM_FAULT_OOM;
|
||||
smp_wmb(); /* See comment in __pte_alloc() */
|
||||
}
|
||||
|
||||
return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
|
||||
@@ -4835,13 +4816,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
smp_wmb(); /* See comment in __pte_alloc */
|
||||
|
||||
spin_lock(&mm->page_table_lock);
|
||||
if (pgd_present(*pgd)) /* Another has populated it */
|
||||
if (pgd_present(*pgd)) { /* Another has populated it */
|
||||
p4d_free(mm, new);
|
||||
else
|
||||
} else {
|
||||
smp_wmb(); /* See comment in pmd_install() */
|
||||
pgd_populate(mm, pgd, new);
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
return 0;
|
||||
}
|
||||
@@ -4858,11 +4839,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
smp_wmb(); /* See comment in __pte_alloc */
|
||||
|
||||
spin_lock(&mm->page_table_lock);
|
||||
if (!p4d_present(*p4d)) {
|
||||
mm_inc_nr_puds(mm);
|
||||
smp_wmb(); /* See comment in pmd_install() */
|
||||
p4d_populate(mm, p4d, new);
|
||||
} else /* Another has populated it */
|
||||
pud_free(mm, new);
|
||||
@@ -4883,14 +4863,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
smp_wmb(); /* See comment in __pte_alloc */
|
||||
|
||||
ptl = pud_lock(mm, pud);
|
||||
if (!pud_present(*pud)) {
|
||||
mm_inc_nr_pmds(mm);
|
||||
smp_wmb(); /* See comment in pmd_install() */
|
||||
pud_populate(mm, pud, new);
|
||||
} else /* Another has populated it */
|
||||
} else { /* Another has populated it */
|
||||
pmd_free(mm, new);
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
return 0;
|
||||
}
|
||||
@@ -5427,7 +5407,6 @@ long copy_huge_page_from_user(struct page *dst_page,
|
||||
unsigned int pages_per_huge_page,
|
||||
bool allow_pagefault)
|
||||
{
|
||||
void *src = (void *)usr_src;
|
||||
void *page_kaddr;
|
||||
unsigned long i, rc = 0;
|
||||
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
|
||||
@@ -5440,8 +5419,7 @@ long copy_huge_page_from_user(struct page *dst_page,
|
||||
else
|
||||
page_kaddr = kmap_atomic(subpage);
|
||||
rc = copy_from_user(page_kaddr,
|
||||
(const void __user *)(src + i * PAGE_SIZE),
|
||||
PAGE_SIZE);
|
||||
usr_src + i * PAGE_SIZE, PAGE_SIZE);
|
||||
if (allow_pagefault)
|
||||
kunmap(subpage);
|
||||
else
|
||||
|
||||
@@ -3342,7 +3342,7 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
|
||||
|
||||
void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
|
||||
{
|
||||
mm->total_vm += npages;
|
||||
WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
|
||||
|
||||
if (is_exec_mapping(flags))
|
||||
mm->exec_vm += npages;
|
||||
|
||||
@@ -563,7 +563,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
|
||||
error = -ENOMEM;
|
||||
if (!vma)
|
||||
goto out;
|
||||
prev = vma->vm_prev;
|
||||
|
||||
if (unlikely(grows & PROT_GROWSDOWN)) {
|
||||
if (vma->vm_start >= end)
|
||||
goto out;
|
||||
@@ -581,8 +581,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (start > vma->vm_start)
|
||||
prev = vma;
|
||||
else
|
||||
prev = vma->vm_prev;
|
||||
|
||||
for (nstart = start ; ; ) {
|
||||
unsigned long mask_off_old_flags;
|
||||
|
||||
50
mm/mremap.c
50
mm/mremap.c
@@ -565,6 +565,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
||||
bool *locked, unsigned long flags,
|
||||
struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
|
||||
{
|
||||
long to_account = new_len - old_len;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct vm_area_struct *new_vma;
|
||||
unsigned long vm_flags = vma->vm_flags;
|
||||
@@ -583,6 +584,9 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
||||
if (mm->map_count >= sysctl_max_map_count - 3)
|
||||
return -ENOMEM;
|
||||
|
||||
if (unlikely(flags & MREMAP_DONTUNMAP))
|
||||
to_account = new_len;
|
||||
|
||||
if (vma->vm_ops && vma->vm_ops->may_split) {
|
||||
if (vma->vm_start != old_addr)
|
||||
err = vma->vm_ops->may_split(vma, old_addr);
|
||||
@@ -604,8 +608,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) {
|
||||
if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT))
|
||||
if (vm_flags & VM_ACCOUNT) {
|
||||
if (security_vm_enough_memory_mm(mm, to_account >> PAGE_SHIFT))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -613,8 +617,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
||||
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
|
||||
&need_rmap_locks);
|
||||
if (!new_vma) {
|
||||
if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT))
|
||||
vm_unacct_memory(new_len >> PAGE_SHIFT);
|
||||
if (vm_flags & VM_ACCOUNT)
|
||||
vm_unacct_memory(to_account >> PAGE_SHIFT);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -708,8 +712,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
static struct vm_area_struct *vma_to_resize(unsigned long addr,
|
||||
unsigned long old_len, unsigned long new_len, unsigned long flags,
|
||||
unsigned long *p)
|
||||
unsigned long old_len, unsigned long new_len, unsigned long flags)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
@@ -768,13 +771,6 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
|
||||
(new_len - old_len) >> PAGE_SHIFT))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (vma->vm_flags & VM_ACCOUNT) {
|
||||
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
|
||||
if (security_vm_enough_memory_mm(mm, charged))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
*p = charged;
|
||||
}
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
@@ -787,7 +783,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long ret = -EINVAL;
|
||||
unsigned long charged = 0;
|
||||
unsigned long map_flags = 0;
|
||||
|
||||
if (offset_in_page(new_addr))
|
||||
@@ -830,7 +825,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
|
||||
old_len = new_len;
|
||||
}
|
||||
|
||||
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
|
||||
vma = vma_to_resize(addr, old_len, new_len, flags);
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto out;
|
||||
@@ -853,7 +848,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
|
||||
((addr - vma->vm_start) >> PAGE_SHIFT),
|
||||
map_flags);
|
||||
if (IS_ERR_VALUE(ret))
|
||||
goto out1;
|
||||
goto out;
|
||||
|
||||
/* We got a new mapping */
|
||||
if (!(flags & MREMAP_FIXED))
|
||||
@@ -862,12 +857,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
|
||||
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
|
||||
uf_unmap);
|
||||
|
||||
if (!(offset_in_page(ret)))
|
||||
goto out;
|
||||
|
||||
out1:
|
||||
vm_unacct_memory(charged);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -899,7 +888,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long ret = -EINVAL;
|
||||
unsigned long charged = 0;
|
||||
bool locked = false;
|
||||
bool downgraded = false;
|
||||
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
|
||||
@@ -981,7 +969,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
|
||||
/*
|
||||
* Ok, we need to grow..
|
||||
*/
|
||||
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
|
||||
vma = vma_to_resize(addr, old_len, new_len, flags);
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto out;
|
||||
@@ -992,10 +980,18 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
|
||||
if (old_len == vma->vm_end - addr) {
|
||||
/* can we just expand the current mapping? */
|
||||
if (vma_expandable(vma, new_len - old_len)) {
|
||||
int pages = (new_len - old_len) >> PAGE_SHIFT;
|
||||
long pages = (new_len - old_len) >> PAGE_SHIFT;
|
||||
|
||||
if (vma->vm_flags & VM_ACCOUNT) {
|
||||
if (security_vm_enough_memory_mm(mm, pages)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (vma_adjust(vma, vma->vm_start, addr + new_len,
|
||||
vma->vm_pgoff, NULL)) {
|
||||
vm_unacct_memory(pages);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -1034,10 +1030,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
|
||||
&locked, flags, &uf, &uf_unmap);
|
||||
}
|
||||
out:
|
||||
if (offset_in_page(ret)) {
|
||||
vm_unacct_memory(charged);
|
||||
if (offset_in_page(ret))
|
||||
locked = false;
|
||||
}
|
||||
if (downgraded)
|
||||
mmap_read_unlock(current->mm);
|
||||
else
|
||||
|
||||
@@ -1120,27 +1120,24 @@ bool out_of_memory(struct oom_control *oc)
|
||||
}
|
||||
|
||||
/*
|
||||
* The pagefault handler calls here because it is out of memory, so kill a
|
||||
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom
|
||||
* killing is already in progress so do nothing.
|
||||
* The pagefault handler calls here because some allocation has failed. We have
|
||||
* to take care of the memcg OOM here because this is the only safe context without
|
||||
* any locks held but let the oom killer triggered from the allocation context care
|
||||
* about the global OOM.
|
||||
*/
|
||||
void pagefault_out_of_memory(void)
|
||||
{
|
||||
struct oom_control oc = {
|
||||
.zonelist = NULL,
|
||||
.nodemask = NULL,
|
||||
.memcg = NULL,
|
||||
.gfp_mask = 0,
|
||||
.order = 0,
|
||||
};
|
||||
static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
DEFAULT_RATELIMIT_BURST);
|
||||
|
||||
if (mem_cgroup_oom_synchronize(true))
|
||||
return;
|
||||
|
||||
if (!mutex_trylock(&oom_lock))
|
||||
if (fatal_signal_pending(current))
|
||||
return;
|
||||
out_of_memory(&oc);
|
||||
mutex_unlock(&oom_lock);
|
||||
|
||||
if (__ratelimit(&pfoom_rs))
|
||||
pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
|
||||
@@ -201,7 +201,7 @@ fail:
|
||||
panic("Out of memory");
|
||||
}
|
||||
|
||||
#else /* CONFIG_FLATMEM */
|
||||
#else /* CONFIG_SPARSEMEM */
|
||||
|
||||
struct page_ext *lookup_page_ext(const struct page *page)
|
||||
{
|
||||
|
||||
@@ -2425,7 +2425,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
|
||||
shmem_recalc_inode(inode);
|
||||
spin_unlock_irq(&info->lock);
|
||||
|
||||
SetPageDirty(page);
|
||||
unlock_page(page);
|
||||
return 0;
|
||||
out_delete_from_cache:
|
||||
|
||||
@@ -76,7 +76,7 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
|
||||
set_pte_at(&init_mm, addr, pte, entry);
|
||||
}
|
||||
|
||||
/* Make pte visible before pmd. See comment in __pte_alloc(). */
|
||||
/* Make pte visible before pmd. See comment in pmd_install(). */
|
||||
smp_wmb();
|
||||
pmd_populate_kernel(&init_mm, pmd, pgtable);
|
||||
|
||||
|
||||
23
mm/swap.c
23
mm/swap.c
@@ -135,18 +135,27 @@ EXPORT_SYMBOL(__put_page);
|
||||
* put_pages_list() - release a list of pages
|
||||
* @pages: list of pages threaded on page->lru
|
||||
*
|
||||
* Release a list of pages which are strung together on page.lru. Currently
|
||||
* used by read_cache_pages() and related error recovery code.
|
||||
* Release a list of pages which are strung together on page.lru.
|
||||
*/
|
||||
void put_pages_list(struct list_head *pages)
|
||||
{
|
||||
while (!list_empty(pages)) {
|
||||
struct page *victim;
|
||||
struct page *page, *next;
|
||||
|
||||
victim = lru_to_page(pages);
|
||||
list_del(&victim->lru);
|
||||
put_page(victim);
|
||||
list_for_each_entry_safe(page, next, pages, lru) {
|
||||
if (!put_page_testzero(page)) {
|
||||
list_del(&page->lru);
|
||||
continue;
|
||||
}
|
||||
if (PageHead(page)) {
|
||||
list_del(&page->lru);
|
||||
__put_compound_page(page);
|
||||
continue;
|
||||
}
|
||||
/* Cannot be PageLRU because it's passed to us using the lru */
|
||||
__ClearPageWaiters(page);
|
||||
}
|
||||
|
||||
free_unref_page_list(pages);
|
||||
}
|
||||
EXPORT_SYMBOL(put_pages_list);
|
||||
|
||||
|
||||
@@ -2763,7 +2763,7 @@ static int swap_show(struct seq_file *swap, void *v)
|
||||
struct swap_info_struct *si = v;
|
||||
struct file *file;
|
||||
int len;
|
||||
unsigned int bytes, inuse;
|
||||
unsigned long bytes, inuse;
|
||||
|
||||
if (si == SEQ_START_TOKEN) {
|
||||
seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
|
||||
@@ -2775,7 +2775,7 @@ static int swap_show(struct seq_file *swap, void *v)
|
||||
|
||||
file = si->swap_file;
|
||||
len = seq_file_path(swap, file, " \t\n\\");
|
||||
seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n",
|
||||
seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n",
|
||||
len < 40 ? 40 - len : 1, " ",
|
||||
S_ISBLK(file_inode(file)->i_mode) ?
|
||||
"partition" : "file\t",
|
||||
@@ -3118,7 +3118,7 @@ static bool swap_discardable(struct swap_info_struct *si)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(si->bdev);
|
||||
|
||||
if (!q || !blk_queue_discard(q))
|
||||
if (!blk_queue_discard(q))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
||||
@@ -69,10 +69,9 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
|
||||
pgoff_t offset, max_off;
|
||||
|
||||
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
|
||||
_dst_pte = pte_mkdirty(_dst_pte);
|
||||
if (page_in_cache && !vm_shared)
|
||||
writable = false;
|
||||
if (writable || !page_in_cache)
|
||||
_dst_pte = pte_mkdirty(_dst_pte);
|
||||
if (writable) {
|
||||
if (wp_copy)
|
||||
_dst_pte = pte_mkuffd_wp(_dst_pte);
|
||||
|
||||
74
mm/vmalloc.c
74
mm/vmalloc.c
@@ -1195,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size,
|
||||
{
|
||||
struct vmap_area *va;
|
||||
struct rb_node *node;
|
||||
unsigned long length;
|
||||
|
||||
/* Start from the root. */
|
||||
node = free_vmap_area_root.rb_node;
|
||||
|
||||
/* Adjust the search size for alignment overhead. */
|
||||
length = size + align - 1;
|
||||
|
||||
while (node) {
|
||||
va = rb_entry(node, struct vmap_area, rb_node);
|
||||
|
||||
if (get_subtree_max_size(node->rb_left) >= length &&
|
||||
if (get_subtree_max_size(node->rb_left) >= size &&
|
||||
vstart < va->va_start) {
|
||||
node = node->rb_left;
|
||||
} else {
|
||||
@@ -1216,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size,
|
||||
/*
|
||||
* Does not make sense to go deeper towards the right
|
||||
* sub-tree if it does not have a free block that is
|
||||
* equal or bigger to the requested search length.
|
||||
* equal or bigger to the requested search size.
|
||||
*/
|
||||
if (get_subtree_max_size(node->rb_right) >= length) {
|
||||
if (get_subtree_max_size(node->rb_right) >= size) {
|
||||
node = node->rb_right;
|
||||
continue;
|
||||
}
|
||||
@@ -1226,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size,
|
||||
/*
|
||||
* OK. We roll back and find the first right sub-tree,
|
||||
* that will satisfy the search criteria. It can happen
|
||||
* only once due to "vstart" restriction.
|
||||
* due to "vstart" restriction or an alignment overhead
|
||||
* that is bigger then PAGE_SIZE.
|
||||
*/
|
||||
while ((node = rb_parent(node))) {
|
||||
va = rb_entry(node, struct vmap_area, rb_node);
|
||||
if (is_within_this_va(va, size, align, vstart))
|
||||
return va;
|
||||
|
||||
if (get_subtree_max_size(node->rb_right) >= length &&
|
||||
if (get_subtree_max_size(node->rb_right) >= size &&
|
||||
vstart <= va->va_start) {
|
||||
/*
|
||||
* Shift the vstart forward. Please note, we update it with
|
||||
* parent's start address adding "1" because we do not want
|
||||
* to enter same sub-tree after it has already been checked
|
||||
* and no suitable free block found there.
|
||||
*/
|
||||
vstart = va->va_start + 1;
|
||||
node = node->rb_right;
|
||||
break;
|
||||
}
|
||||
@@ -1265,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size,
|
||||
}
|
||||
|
||||
static void
|
||||
find_vmap_lowest_match_check(unsigned long size)
|
||||
find_vmap_lowest_match_check(unsigned long size, unsigned long align)
|
||||
{
|
||||
struct vmap_area *va_1, *va_2;
|
||||
unsigned long vstart;
|
||||
@@ -1274,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size)
|
||||
get_random_bytes(&rnd, sizeof(rnd));
|
||||
vstart = VMALLOC_START + rnd;
|
||||
|
||||
va_1 = find_vmap_lowest_match(size, 1, vstart);
|
||||
va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
|
||||
va_1 = find_vmap_lowest_match(size, align, vstart);
|
||||
va_2 = find_vmap_lowest_linear_match(size, align, vstart);
|
||||
|
||||
if (va_1 != va_2)
|
||||
pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
|
||||
@@ -1454,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
|
||||
return vend;
|
||||
|
||||
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
|
||||
find_vmap_lowest_match_check(size);
|
||||
find_vmap_lowest_match_check(size, align);
|
||||
#endif
|
||||
|
||||
return nva_start_addr;
|
||||
@@ -2272,15 +2276,21 @@ void __init vm_area_add_early(struct vm_struct *vm)
|
||||
*/
|
||||
void __init vm_area_register_early(struct vm_struct *vm, size_t align)
|
||||
{
|
||||
static size_t vm_init_off __initdata;
|
||||
unsigned long addr;
|
||||
unsigned long addr = ALIGN(VMALLOC_START, align);
|
||||
struct vm_struct *cur, **p;
|
||||
|
||||
addr = ALIGN(VMALLOC_START + vm_init_off, align);
|
||||
vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
|
||||
BUG_ON(vmap_initialized);
|
||||
|
||||
for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) {
|
||||
if ((unsigned long)cur->addr - addr >= vm->size)
|
||||
break;
|
||||
addr = ALIGN((unsigned long)cur->addr + cur->size, align);
|
||||
}
|
||||
|
||||
BUG_ON(addr > VMALLOC_END - vm->size);
|
||||
vm->addr = (void *)addr;
|
||||
|
||||
vm_area_add_early(vm);
|
||||
vm->next = *p;
|
||||
*p = vm;
|
||||
}
|
||||
|
||||
static void vmap_init_free_space(void)
|
||||
@@ -2743,6 +2753,13 @@ void *vmap(struct page **pages, unsigned int count,
|
||||
|
||||
might_sleep();
|
||||
|
||||
/*
|
||||
* Your top guard is someone else's bottom guard. Not having a top
|
||||
* guard compromises someone else's mappings too.
|
||||
*/
|
||||
if (WARN_ON_ONCE(flags & VM_NO_GUARD))
|
||||
flags &= ~VM_NO_GUARD;
|
||||
|
||||
if (count > totalram_pages())
|
||||
return NULL;
|
||||
|
||||
@@ -2860,6 +2877,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
/* High-order pages or fallback path if "bulk" fails. */
|
||||
|
||||
while (nr_allocated < nr_pages) {
|
||||
if (fatal_signal_pending(current))
|
||||
break;
|
||||
|
||||
if (nid == NUMA_NO_NODE)
|
||||
page = alloc_pages(gfp, order);
|
||||
else
|
||||
@@ -2887,6 +2907,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
int node)
|
||||
{
|
||||
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
|
||||
const gfp_t orig_gfp_mask = gfp_mask;
|
||||
unsigned long addr = (unsigned long)area->addr;
|
||||
unsigned long size = get_vm_area_size(area);
|
||||
unsigned long array_size;
|
||||
@@ -2907,7 +2928,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
}
|
||||
|
||||
if (!area->pages) {
|
||||
warn_alloc(gfp_mask, NULL,
|
||||
warn_alloc(orig_gfp_mask, NULL,
|
||||
"vmalloc error: size %lu, failed to allocated page array size %lu",
|
||||
nr_small_pages * PAGE_SIZE, array_size);
|
||||
free_vm_area(area);
|
||||
@@ -2927,7 +2948,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
* allocation request, free them via __vfree() if any.
|
||||
*/
|
||||
if (area->nr_pages != nr_small_pages) {
|
||||
warn_alloc(gfp_mask, NULL,
|
||||
warn_alloc(orig_gfp_mask, NULL,
|
||||
"vmalloc error: size %lu, page order %u, failed to allocate pages",
|
||||
area->nr_pages * PAGE_SIZE, page_order);
|
||||
goto fail;
|
||||
@@ -2935,7 +2956,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
|
||||
|
||||
if (vmap_pages_range(addr, addr + size, prot, area->pages,
|
||||
page_shift) < 0) {
|
||||
warn_alloc(gfp_mask, NULL,
|
||||
warn_alloc(orig_gfp_mask, NULL,
|
||||
"vmalloc error: size %lu, failed to map pages",
|
||||
area->nr_pages * PAGE_SIZE);
|
||||
goto fail;
|
||||
@@ -3856,6 +3877,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_NUMA)) {
|
||||
unsigned int nr, *counters = m->private;
|
||||
unsigned int step = 1U << vm_area_page_order(v);
|
||||
|
||||
if (!counters)
|
||||
return;
|
||||
@@ -3867,9 +3889,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
|
||||
|
||||
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
|
||||
|
||||
for (nr = 0; nr < v->nr_pages; nr++)
|
||||
counters[page_to_nid(v->pages[nr])]++;
|
||||
|
||||
for (nr = 0; nr < v->nr_pages; nr += step)
|
||||
counters[page_to_nid(v->pages[nr])] += step;
|
||||
for_each_node_state(nr, N_HIGH_MEMORY)
|
||||
if (counters[nr])
|
||||
seq_printf(m, " N%u=%u", nr, counters[nr]);
|
||||
@@ -3905,7 +3926,7 @@ static int s_show(struct seq_file *m, void *p)
|
||||
(void *)va->va_start, (void *)va->va_end,
|
||||
va->va_end - va->va_start);
|
||||
|
||||
return 0;
|
||||
goto final;
|
||||
}
|
||||
|
||||
v = va->vm;
|
||||
@@ -3946,6 +3967,7 @@ static int s_show(struct seq_file *m, void *p)
|
||||
/*
|
||||
* As a final step, dump "unpurged" areas.
|
||||
*/
|
||||
final:
|
||||
if (list_is_last(&va->list, &vmap_area_list))
|
||||
show_purge_info(m);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user