Merge 0eb68437a7 ("vmalloc: choose a better start address in vm_area_register_early()") into android-mainline

Steps on the way to 5.16-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ia521da34dd6bf9f4c3e3927228f34230a986d901
This commit is contained in:
Greg Kroah-Hartman
2021-11-22 11:07:00 +01:00
34 changed files with 429 additions and 489 deletions

View File

@@ -87,10 +87,8 @@ Brief summary of control files.
memory.oom_control set/show oom controls.
memory.numa_stat show the number of memory usage per numa
node
memory.kmem.limit_in_bytes set/show hard limit for kernel memory
This knob is deprecated and shouldn't be
used. It is planned that this be removed in
the foreseeable future.
memory.kmem.limit_in_bytes This knob is deprecated and writing to
it will return -ENOTSUPP.
memory.kmem.usage_in_bytes show current kernel memory allocation
memory.kmem.failcnt show the number of kernel memory usage
hits limits
@@ -518,11 +516,6 @@ will be charged as a new owner of it.
charged file caches. Some out-of-use page caches may keep charged until
memory pressure happens. If you want to avoid that, force_empty will be useful.
Also, note that when memory.kmem.limit_in_bytes is set the charges due to
kernel pages will still be seen. This is not considered a failure and the
write will still return success. In this case, it is expected that
memory.kmem.usage_in_bytes == memory.usage_in_bytes.
5.2 stat file
-------------

View File

@@ -2409,6 +2409,7 @@ static void __exit cleanup_mtd(void)
if (proc_mtd)
remove_proc_entry("mtd", NULL);
class_unregister(&mtd_class);
bdi_unregister(mtd_bdi);
bdi_put(mtd_bdi);
idr_destroy(&mtd_idr);
}

View File

@@ -476,6 +476,8 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb_lock);
up_write(&sb->s_umount);
if (sb->s_bdi != &noop_backing_dev_info) {
if (sb->s_iflags & SB_I_PERSB_BDI)
bdi_unregister(sb->s_bdi);
bdi_put(sb->s_bdi);
sb->s_bdi = &noop_backing_dev_info;
}
@@ -1562,6 +1564,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
}
WARN_ON(sb->s_bdi != &noop_backing_dev_info);
sb->s_bdi = bdi;
sb->s_iflags |= SB_I_PERSB_BDI;
return 0;
}

View File

@@ -103,6 +103,9 @@ struct wb_completion {
* change as blkcg is disabled and enabled higher up in the hierarchy, a wb
* is tested for blkcg after lookup and removed from index on mismatch so
* that a new wb for the combination can be created.
*
* Each bdi_writeback that is not embedded into the backing_dev_info must hold
* a reference to the parent backing_dev_info. See cgwb_create() for details.
*/
struct bdi_writeback {
struct backing_dev_info *bdi; /* our parent bdi */

View File

@@ -1440,6 +1440,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
/* Possible states of 'frozen' field */
enum {

View File

@@ -609,9 +609,9 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
void free_pages_exact(void *virt, size_t size);
void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1);
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask), 0)

View File

@@ -132,13 +132,7 @@ io_mapping_init_wc(struct io_mapping *iomap,
iomap->base = base;
iomap->size = size;
#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
#elif defined(pgprot_writecombine)
iomap->prot = pgprot_writecombine(PAGE_KERNEL);
#else
iomap->prot = pgprot_noncached(PAGE_KERNEL);
#endif
return iomap;
}

View File

@@ -180,12 +180,6 @@ struct mem_cgroup_thresholds {
struct mem_cgroup_threshold_ary *spare;
};
enum memcg_kmem_state {
KMEM_NONE,
KMEM_ALLOCATED,
KMEM_ONLINE,
};
#if defined(CONFIG_SMP)
struct memcg_padding {
char x[0];
@@ -318,7 +312,6 @@ struct mem_cgroup {
#ifdef CONFIG_MEMCG_KMEM
int kmemcg_id;
enum memcg_kmem_state kmem_state;
struct obj_cgroup __rcu *objcg;
struct list_head objcg_list; /* list of inherited objcgs */
#endif

View File

@@ -140,7 +140,6 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
extern int memory_group_register_static(int nid, unsigned long max_pages);
extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);

View File

@@ -1827,12 +1827,24 @@ extern void user_shm_unlock(size_t, struct ucounts *);
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
struct address_space *zap_mapping; /* Check page->mapping if set */
struct page *single_page; /* Locked page to be unmapped */
};
/*
* We set details->zap_mappings when we want to unmap shared but keep private
* pages. Return true if skip zapping this page, false otherwise.
*/
static inline bool
zap_skip_check_mapping(struct zap_details *details, struct page *page)
{
if (!details || !page)
return false;
return details->zap_mapping &&
(details->zap_mapping != page_rmapping(page));
}
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,

View File

@@ -1479,7 +1479,7 @@ static inline int pfn_valid(unsigned long pfn)
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
ms = __nr_to_section(pfn_to_section_nr(pfn));
ms = __pfn_to_section(pfn);
if (!valid_section(ms))
return 0;
/*
@@ -1494,7 +1494,7 @@ static inline int pfn_in_present_section(unsigned long pfn)
{
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
return present_section(__pfn_to_section(pfn));
}
static inline unsigned long next_present_section_nr(unsigned long section_nr)

View File

@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_fc_populate_pte_fn_t populate_pte_fn);
#endif
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
extern bool is_kernel_percpu_address(unsigned long addr);
@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr);
extern void __init setup_per_cpu_areas(void);
#endif
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
extern void __percpu *__alloc_percpu(size_t size, size_t align);
extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
extern void free_percpu(void __percpu *__pdata);
extern phys_addr_t per_cpu_ptr_to_phys(void *addr);

View File

@@ -181,7 +181,7 @@ int kmem_cache_shrink(struct kmem_cache *s);
/*
* Common kmalloc functions provided by all allocators
*/
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags);
void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
@@ -425,7 +425,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
#define kmalloc_index(s) __kmalloc_index(s, true)
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
@@ -449,11 +449,12 @@ static __always_inline void kfree_bulk(size_t size, void **p)
}
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
__alloc_size(1);
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
__malloc;
#else
static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __kmalloc(size, flags);
}
@@ -466,23 +467,23 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
#ifdef CONFIG_TRACING
extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_slab_alignment __malloc;
__assume_slab_alignment __alloc_size(3);
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size) __assume_slab_alignment __malloc;
int node, size_t size) __assume_slab_alignment
__alloc_size(4);
#else
static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags, int node,
size_t size)
static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags, int node, size_t size)
{
return kmem_cache_alloc_trace(s, gfpflags, size);
}
#endif /* CONFIG_NUMA */
#else /* CONFIG_TRACING */
static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags,
size_t size)
static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
gfp_t flags, size_t size)
{
void *ret = kmem_cache_alloc(s, flags);
@@ -501,19 +502,20 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g
#endif /* CONFIG_TRACING */
extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
__malloc;
__alloc_size(1);
#ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
__assume_page_alignment __malloc;
__assume_page_alignment __alloc_size(1);
#else
static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags,
unsigned int order)
{
return kmalloc_order(size, flags, order);
}
#endif
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags)
{
unsigned int order = get_order(size);
return kmalloc_order_trace(size, flags, order);
@@ -573,7 +575,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
* Try really hard to succeed the allocation but fail
* eventually.
*/
static __always_inline void *kmalloc(size_t size, gfp_t flags)
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
@@ -595,7 +597,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
return __kmalloc(size, flags);
}
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
#ifndef CONFIG_SLOB
if (__builtin_constant_p(size) &&
@@ -619,7 +621,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags)
{
size_t bytes;
@@ -637,8 +639,10 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
* @new_size: new size of a single member of the array
* @flags: the type of memory to allocate (see kmalloc)
*/
static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size,
gfp_t flags)
static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
size_t new_n,
size_t new_size,
gfp_t flags)
{
size_t bytes;
@@ -654,7 +658,7 @@ static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t n
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags)
{
return kmalloc_array(n, size, flags | __GFP_ZERO);
}
@@ -667,12 +671,13 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
* allocator where we care about the real place the memory allocation
* request comes from.
*/
extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
__alloc_size(1);
#define kmalloc_track_caller(size, flags) \
__kmalloc_track_caller(size, flags, _RET_IP_)
static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
int node)
static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
int node)
{
size_t bytes;
@@ -683,7 +688,7 @@ static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
return __kmalloc_node(bytes, flags, node);
}
static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
{
return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
}
@@ -691,7 +696,7 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
#ifdef CONFIG_NUMA
extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
unsigned long caller);
unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller(size, flags, node) \
__kmalloc_node_track_caller(size, flags, node, \
_RET_IP_)
@@ -716,7 +721,7 @@ static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate (see kmalloc).
*/
static inline void *kzalloc(size_t size, gfp_t flags)
static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags)
{
return kmalloc(size, flags | __GFP_ZERO);
}
@@ -727,26 +732,26 @@ static inline void *kzalloc(size_t size, gfp_t flags)
* @flags: the type of memory to allocate (see kmalloc).
* @node: memory node from which to allocate
*/
static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node)
{
return kmalloc_node(size, flags | __GFP_ZERO, node);
}
extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
static inline void *kvmalloc(size_t size, gfp_t flags)
extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags)
{
return kvmalloc_node(size, flags, NUMA_NO_NODE);
}
static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node)
{
return kvmalloc_node(size, flags | __GFP_ZERO, node);
}
static inline void *kvzalloc(size_t size, gfp_t flags)
static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags)
{
return kvmalloc(size, flags | __GFP_ZERO);
}
static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
{
size_t bytes;
@@ -756,13 +761,13 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
return kvmalloc(bytes, flags);
}
static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags)
{
return kvmalloc_array(n, size, flags | __GFP_ZERO);
}
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
gfp_t flags);
extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
__alloc_size(3);
extern void kvfree(const void *addr);
extern void kvfree_sensitive(const void *addr, size_t len);

View File

@@ -22,7 +22,7 @@ struct notifier_block; /* in notifier.h */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
@@ -136,21 +136,21 @@ static inline void vmalloc_init(void)
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
#endif
extern void *vmalloc(unsigned long size);
extern void *vzalloc(unsigned long size);
extern void *vmalloc_user(unsigned long size);
extern void *vmalloc_node(unsigned long size, int node);
extern void *vzalloc_node(unsigned long size, int node);
extern void *vmalloc_32(unsigned long size);
extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
extern void *vmalloc(unsigned long size) __alloc_size(1);
extern void *vzalloc(unsigned long size) __alloc_size(1);
extern void *vmalloc_user(unsigned long size) __alloc_size(1);
extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
extern void *vmalloc_32(unsigned long size) __alloc_size(1);
extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller);
const void *caller) __alloc_size(1);
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller);
void *vmalloc_no_huge(unsigned long size);
int node, const void *caller) __alloc_size(1);
void *vmalloc_no_huge(unsigned long size) __alloc_size(1);
extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr);

View File

@@ -13,7 +13,7 @@ struct mm_struct;
extern int trace_mmap_lock_reg(void);
extern void trace_mmap_lock_unreg(void);
TRACE_EVENT_FN(mmap_lock_start_locking,
DECLARE_EVENT_CLASS(mmap_lock,
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
@@ -32,15 +32,23 @@ TRACE_EVENT_FN(mmap_lock_start_locking,
),
TP_printk(
"mm=%p memcg_path=%s write=%s\n",
"mm=%p memcg_path=%s write=%s",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false"
),
trace_mmap_lock_reg, trace_mmap_lock_unreg
)
);
#define DEFINE_MMAP_LOCK_EVENT(name) \
DEFINE_EVENT_FN(mmap_lock, name, \
TP_PROTO(struct mm_struct *mm, const char *memcg_path, \
bool write), \
TP_ARGS(mm, memcg_path, write), \
trace_mmap_lock_reg, trace_mmap_lock_unreg)
DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking);
DEFINE_MMAP_LOCK_EVENT(mmap_lock_released);
TRACE_EVENT_FN(mmap_lock_acquire_returned,
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
@@ -63,7 +71,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned,
),
TP_printk(
"mm=%p memcg_path=%s write=%s success=%s\n",
"mm=%p memcg_path=%s write=%s success=%s",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false",
@@ -73,34 +81,6 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned,
trace_mmap_lock_reg, trace_mmap_lock_unreg
);
TRACE_EVENT_FN(mmap_lock_released,
TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
TP_ARGS(mm, memcg_path, write),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__string(memcg_path, memcg_path)
__field(bool, write)
),
TP_fast_assign(
__entry->mm = mm;
__assign_str(memcg_path, memcg_path);
__entry->write = write;
),
TP_printk(
"mm=%p memcg_path=%s write=%s\n",
__entry->mm,
__get_str(memcg_path),
__entry->write ? "true" : "false"
),
trace_mmap_lock_reg, trace_mmap_lock_unreg
);
#endif /* _TRACE_MMAP_LOCK_H */
/* This part must be outside protection */

View File

@@ -137,7 +137,7 @@ static void __acct_update_integrals(struct task_struct *tsk,
* the rest of the math is done in xacct_add_tsk.
*/
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10;
}
/**

View File

@@ -292,8 +292,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
memset(wb, 0, sizeof(*wb));
if (wb != &bdi->wb)
bdi_get(bdi);
wb->bdi = bdi;
wb->last_old_flush = jiffies;
INIT_LIST_HEAD(&wb->b_dirty);
@@ -317,7 +315,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
goto out_put_bdi;
return err;
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, gfp);
@@ -331,9 +329,6 @@ out_destroy_stat:
while (i--)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
out_put_bdi:
if (wb != &bdi->wb)
bdi_put(bdi);
return err;
}
@@ -374,8 +369,6 @@ static void wb_exit(struct bdi_writeback *wb)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
if (wb != &wb->bdi->wb)
bdi_put(wb->bdi);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -398,6 +391,7 @@ static void cgwb_release_workfn(struct work_struct *work)
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
release_work);
struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
struct backing_dev_info *bdi = wb->bdi;
mutex_lock(&wb->bdi->cgwb_release_mutex);
wb_shutdown(wb);
@@ -417,6 +411,7 @@ static void cgwb_release_workfn(struct work_struct *work)
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu);
}
@@ -498,6 +493,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->b_attached);
INIT_WORK(&wb->release_work, cgwb_release_workfn);
set_bit(WB_registered, &wb->state);
bdi_get(bdi);
/*
* The root wb determines the registered state of the whole bdi and
@@ -529,6 +525,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
goto out_put;
err_fprop_exit:
bdi_put(bdi);
fprop_local_destroy_percpu(&wb->memcg_completions);
err_ref_exit:
percpu_ref_exit(&wb->refcnt);
@@ -959,14 +956,14 @@ void bdi_unregister(struct backing_dev_info *bdi)
bdi->owner = NULL;
}
}
EXPORT_SYMBOL(bdi_unregister);
static void release_bdi(struct kref *ref)
{
struct backing_dev_info *bdi =
container_of(ref, struct backing_dev_info, refcnt);
if (test_bit(WB_registered, &bdi->wb.state))
bdi_unregister(bdi);
WARN_ON_ONCE(test_bit(WB_registered, &bdi->wb.state));
WARN_ON_ONCE(bdi->dev);
wb_exit(&bdi->wb);
kfree(bdi);

View File

@@ -638,6 +638,30 @@ static bool mapping_needs_writeback(struct address_space *mapping)
return mapping->nrpages;
}
static bool filemap_range_has_writeback(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
pgoff_t max = end_byte >> PAGE_SHIFT;
struct page *page;
if (end_byte < start_byte)
return false;
rcu_read_lock();
xas_for_each(&xas, page, max) {
if (xas_retry(&xas, page))
continue;
if (xa_is_value(page))
continue;
if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
break;
}
rcu_read_unlock();
return page != NULL;
}
/**
* filemap_range_needs_writeback - check if range potentially needs writeback
* @mapping: address space within which to check
@@ -655,29 +679,12 @@ static bool mapping_needs_writeback(struct address_space *mapping)
bool filemap_range_needs_writeback(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
pgoff_t max = end_byte >> PAGE_SHIFT;
struct page *page;
if (!mapping_needs_writeback(mapping))
return false;
if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
return false;
if (end_byte < start_byte)
return false;
rcu_read_lock();
xas_for_each(&xas, page, max) {
if (xas_retry(&xas, page))
continue;
if (xa_is_value(page))
continue;
if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
break;
}
rcu_read_unlock();
return page != NULL;
return filemap_range_has_writeback(mapping, start_byte, end_byte);
}
EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
@@ -2088,7 +2095,6 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
if (!xa_is_value(page)) {
if (page->index < start)
goto put;
VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
if (page->index + thp_nr_pages(page) - 1 > end)
goto put;
if (!trylock_page(page))
@@ -2621,6 +2627,9 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
iocb->ki_flags |= IOCB_NOWAIT;
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
error = filemap_get_pages(iocb, iter, &pvec);
if (error < 0)
break;
@@ -2733,9 +2742,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
loff_t size;
size = i_size_read(inode);
if (iocb->ki_flags & IOCB_NOWAIT) {
if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
iocb->ki_pos + count - 1))
@@ -2767,8 +2774,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* the rest of the read. Buffered reads will not work for
* DAX files, so don't bother trying.
*/
if (retval < 0 || !count || iocb->ki_pos >= size ||
IS_DAX(inode))
if (retval < 0 || !count || IS_DAX(inode))
return retval;
if (iocb->ki_pos >= i_size_read(inode))
return retval;
}
@@ -3201,15 +3209,8 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
}
}
if (pmd_none(*vmf->pmd)) {
vmf->ptl = pmd_lock(mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(mm);
pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
}
if (pmd_none(*vmf->pmd))
pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
/* See comment in handle_pte_fault() */
if (pmd_devmap_trans_unstable(vmf->pmd)) {

View File

@@ -2365,7 +2365,6 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
{
int nr_start = *nr;
struct dev_pagemap *pgmap = NULL;
int ret = 1;
do {
struct page *page = pfn_to_page(pfn);
@@ -2373,14 +2372,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
pgmap = get_dev_pagemap(pfn, pgmap);
if (unlikely(!pgmap)) {
undo_dev_pagemap(nr, nr_start, flags, pages);
ret = 0;
break;
}
SetPageReferenced(page);
pages[*nr] = page;
if (unlikely(!try_grab_page(page, flags))) {
undo_dev_pagemap(nr, nr_start, flags, pages);
ret = 0;
break;
}
(*nr)++;
@@ -2388,7 +2385,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
} while (addr += PAGE_SIZE, addr != end);
put_dev_pagemap(pgmap);
return ret;
return addr == end;
}
static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,

View File

@@ -47,6 +47,7 @@ bool __folio_end_writeback(struct folio *folio);
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
{

View File

@@ -15,18 +15,29 @@
#include "slab.h"
#ifdef CONFIG_MEMCG_KMEM
static LIST_HEAD(list_lrus);
static LIST_HEAD(memcg_list_lrus);
static DEFINE_MUTEX(list_lrus_mutex);
static inline bool list_lru_memcg_aware(struct list_lru *lru)
{
return lru->memcg_aware;
}
static void list_lru_register(struct list_lru *lru)
{
if (!list_lru_memcg_aware(lru))
return;
mutex_lock(&list_lrus_mutex);
list_add(&lru->list, &list_lrus);
list_add(&lru->list, &memcg_list_lrus);
mutex_unlock(&list_lrus_mutex);
}
static void list_lru_unregister(struct list_lru *lru)
{
if (!list_lru_memcg_aware(lru))
return;
mutex_lock(&list_lrus_mutex);
list_del(&lru->list);
mutex_unlock(&list_lrus_mutex);
@@ -37,11 +48,6 @@ static int lru_shrinker_id(struct list_lru *lru)
return lru->shrinker_id;
}
static inline bool list_lru_memcg_aware(struct list_lru *lru)
{
return lru->memcg_aware;
}
static inline struct list_lru_one *
list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
{
@@ -176,13 +182,16 @@ unsigned long list_lru_count_one(struct list_lru *lru,
{
struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
unsigned long count;
long count;
rcu_read_lock();
l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
count = READ_ONCE(l->nr_items);
rcu_read_unlock();
if (unlikely(count < 0))
count = 0;
return count;
}
EXPORT_SYMBOL_GPL(list_lru_count_one);
@@ -354,8 +363,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
struct list_lru_memcg *memcg_lrus;
int size = memcg_nr_cache_ids;
memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
size * sizeof(void *), GFP_KERNEL);
memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL);
if (!memcg_lrus)
return -ENOMEM;
@@ -389,7 +397,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
old = rcu_dereference_protected(nlru->memcg_lrus,
lockdep_is_held(&list_lrus_mutex));
new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -398,19 +406,8 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
return -ENOMEM;
}
memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
/*
* The locking below allows readers that hold nlru->lock avoid taking
* rcu_read_lock (see list_lru_from_memcg_idx).
*
* Since list_lru_{add,del} may be called under an IRQ-safe lock,
* we have to use IRQ-safe primitives here to avoid deadlock.
*/
spin_lock_irq(&nlru->lock);
memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size));
rcu_assign_pointer(nlru->memcg_lrus, new);
spin_unlock_irq(&nlru->lock);
kvfree_rcu(old, rcu);
return 0;
}
@@ -466,9 +463,6 @@ static int memcg_update_list_lru(struct list_lru *lru,
{
int i;
if (!list_lru_memcg_aware(lru))
return 0;
for_each_node(i) {
if (memcg_update_list_lru_node(&lru->node[i],
old_size, new_size))
@@ -491,9 +485,6 @@ static void memcg_cancel_update_list_lru(struct list_lru *lru,
{
int i;
if (!list_lru_memcg_aware(lru))
return;
for_each_node(i)
memcg_cancel_update_list_lru_node(&lru->node[i],
old_size, new_size);
@@ -506,7 +497,7 @@ int memcg_update_all_list_lrus(int new_size)
int old_size = memcg_nr_cache_ids;
mutex_lock(&list_lrus_mutex);
list_for_each_entry(lru, &list_lrus, list) {
list_for_each_entry(lru, &memcg_list_lrus, list) {
ret = memcg_update_list_lru(lru, old_size, new_size);
if (ret)
goto fail;
@@ -515,7 +506,7 @@ out:
mutex_unlock(&list_lrus_mutex);
return ret;
fail:
list_for_each_entry_continue_reverse(lru, &list_lrus, list)
list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list)
memcg_cancel_update_list_lru(lru, old_size, new_size);
goto out;
}
@@ -552,9 +543,6 @@ static void memcg_drain_list_lru(struct list_lru *lru,
{
int i;
if (!list_lru_memcg_aware(lru))
return;
for_each_node(i)
memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
}
@@ -564,7 +552,7 @@ void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
struct list_lru *lru;
mutex_lock(&list_lrus_mutex);
list_for_each_entry(lru, &list_lrus, list)
list_for_each_entry(lru, &memcg_list_lrus, list)
memcg_drain_list_lru(lru, src_idx, dst_memcg);
mutex_unlock(&list_lrus_mutex);
}

View File

@@ -103,11 +103,6 @@ static bool do_memsw_account(void)
return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
}
/* memcg and lruvec stats flushing */
static void flush_memcg_stats_dwork(struct work_struct *w);
static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
static DEFINE_SPINLOCK(stats_flush_lock);
#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
@@ -239,7 +234,7 @@ enum res_type {
iter != NULL; \
iter = mem_cgroup_iter(NULL, iter, NULL))
static inline bool should_force_charge(void)
static inline bool task_is_dying(void)
{
return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
(current->flags & PF_EXITING);
@@ -613,6 +608,58 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
return mz;
}
/*
* memcg and lruvec stats flushing
*
* Many codepaths leading to stats update or read are performance sensitive and
* adding stats flushing in such codepaths is not desirable. So, to optimize the
* flushing the kernel does:
*
* 1) Periodically and asynchronously flush the stats every 2 seconds to not let
* rstat update tree grow unbounded.
*
* 2) Flush the stats synchronously on reader side only when there are more than
* (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization
* will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but
* only for 2 seconds due to (1).
*/
static void flush_memcg_stats_dwork(struct work_struct *w);
static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
static DEFINE_SPINLOCK(stats_flush_lock);
static DEFINE_PER_CPU(unsigned int, stats_updates);
static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
{
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
atomic_inc(&stats_flush_threshold);
}
static void __mem_cgroup_flush_stats(void)
{
unsigned long flag;
if (!spin_trylock_irqsave(&stats_flush_lock, flag))
return;
cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
atomic_set(&stats_flush_threshold, 0);
spin_unlock_irqrestore(&stats_flush_lock, flag);
}
void mem_cgroup_flush_stats(void)
{
if (atomic_read(&stats_flush_threshold) > num_online_cpus())
__mem_cgroup_flush_stats();
}
static void flush_memcg_stats_dwork(struct work_struct *w)
{
mem_cgroup_flush_stats();
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
}
/**
* __mod_memcg_state - update cgroup memory statistics
* @memcg: the memory cgroup
@@ -625,7 +672,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
return;
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
memcg_rstat_updated(memcg);
}
/* idx can be of type enum memcg_stat_item or node_stat_item. */
@@ -653,10 +700,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
memcg = pn->memcg;
/* Update memcg */
__mod_memcg_state(memcg, idx, val);
__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
/* Update lruvec */
__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
memcg_rstat_updated(memcg);
}
/**
@@ -758,7 +807,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
return;
__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
memcg_rstat_updated(memcg);
}
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@ -1415,7 +1464,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
*
* Current memory state:
*/
cgroup_rstat_flush(memcg->css.cgroup);
mem_cgroup_flush_stats();
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
u64 size;
@@ -1576,7 +1625,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
* A few threads which were not waiting at mutex_lock_killable() can
* fail to bail out. Therefore, check again after holding oom_lock.
*/
ret = should_force_charge() || out_of_memory(&oc);
ret = task_is_dying() || out_of_memory(&oc);
unlock:
mutex_unlock(&oom_lock);
@@ -2544,6 +2593,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
struct page_counter *counter;
enum oom_status oom_status;
unsigned long nr_reclaimed;
bool passed_oom = false;
bool may_swap = true;
bool drained = false;
unsigned long pflags;
@@ -2578,15 +2628,6 @@ retry:
if (gfp_mask & __GFP_ATOMIC)
goto force;
/*
* Unlike in global OOM situations, memcg is not in a physical
* memory shortage. Allow dying and OOM-killed tasks to
* bypass the last charges so that they can exit quickly and
* free their memory.
*/
if (unlikely(should_force_charge()))
goto force;
/*
* Prevent unbounded recursion when reclaim operations need to
* allocate memory. This might exceed the limits temporarily,
@@ -2644,8 +2685,9 @@ retry:
if (gfp_mask & __GFP_RETRY_MAYFAIL)
goto nomem;
if (fatal_signal_pending(current))
goto force;
/* Avoid endless loop for tasks bypassed by the oom killer */
if (passed_oom && task_is_dying())
goto nomem;
/*
* keep retrying as long as the memcg oom killer is able to make
@@ -2654,14 +2696,10 @@ retry:
*/
oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
get_order(nr_pages * PAGE_SIZE));
switch (oom_status) {
case OOM_SUCCESS:
if (oom_status == OOM_SUCCESS) {
passed_oom = true;
nr_retries = MAX_RECLAIM_RETRIES;
goto retry;
case OOM_FAILED:
goto force;
default:
goto nomem;
}
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
@@ -2736,8 +2774,7 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
return try_charge_memcg(memcg, gfp_mask, nr_pages);
}
#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU)
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
if (mem_cgroup_is_root(memcg))
return;
@@ -2746,7 +2783,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
}
#endif
static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
{
@@ -2965,7 +3001,6 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
unsigned int nr_pages)
{
struct page_counter *counter;
struct mem_cgroup *memcg;
int ret;
@@ -2975,21 +3010,8 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret)
goto out;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
!page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
/*
* Enforce __GFP_NOFAIL allocation because callers are not
* prepared to see failures and likely do not have any failure
* handling code.
*/
if (gfp & __GFP_NOFAIL) {
page_counter_charge(&memcg->kmem, nr_pages);
goto out;
}
cancel_charge(memcg, nr_pages);
ret = -ENOMEM;
}
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
page_counter_charge(&memcg->kmem, nr_pages);
out:
css_put(&memcg->css);
@@ -3534,8 +3556,7 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
unsigned long val;
if (mem_cgroup_is_root(memcg)) {
/* mem_cgroup_threshold() calls here from irqsafe context */
cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
mem_cgroup_flush_stats();
val = memcg_page_state(memcg, NR_FILE_PAGES) +
memcg_page_state(memcg, NR_ANON_MAPPED);
if (swap)
@@ -3610,7 +3631,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
return 0;
BUG_ON(memcg->kmemcg_id >= 0);
BUG_ON(memcg->kmem_state);
memcg_id = memcg_alloc_cache_id();
if (memcg_id < 0)
@@ -3627,22 +3647,18 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
static_branch_enable(&memcg_kmem_enabled_key);
memcg->kmemcg_id = memcg_id;
memcg->kmem_state = KMEM_ONLINE;
return 0;
}
static void memcg_offline_kmem(struct mem_cgroup *memcg)
{
struct cgroup_subsys_state *css;
struct mem_cgroup *parent, *child;
struct mem_cgroup *parent;
int kmemcg_id;
if (memcg->kmem_state != KMEM_ONLINE)
if (memcg->kmemcg_id == -1)
return;
memcg->kmem_state = KMEM_ALLOCATED;
parent = parent_mem_cgroup(memcg);
if (!parent)
parent = root_mem_cgroup;
@@ -3653,31 +3669,15 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
BUG_ON(kmemcg_id < 0);
/*
* Change kmemcg_id of this cgroup and all its descendants to the
* parent's id, and then move all entries from this cgroup's list_lrus
* to ones of the parent. After we have finished, all list_lrus
* corresponding to this cgroup are guaranteed to remain empty. The
* ordering is imposed by list_lru_node->lock taken by
* After we have finished memcg_reparent_objcgs(), all list_lrus
* corresponding to this cgroup are guaranteed to remain empty.
* The ordering is imposed by list_lru_node->lock taken by
* memcg_drain_all_list_lrus().
*/
rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */
css_for_each_descendant_pre(css, &memcg->css) {
child = mem_cgroup_from_css(css);
BUG_ON(child->kmemcg_id != kmemcg_id);
child->kmemcg_id = parent->kmemcg_id;
}
rcu_read_unlock();
memcg_drain_all_list_lrus(kmemcg_id, parent);
memcg_free_cache_id(kmemcg_id);
}
static void memcg_free_kmem(struct mem_cgroup *memcg)
{
/* css_alloc() failed, offlining didn't happen */
if (unlikely(memcg->kmem_state == KMEM_ONLINE))
memcg_offline_kmem(memcg);
memcg->kmemcg_id = -1;
}
#else
static int memcg_online_kmem(struct mem_cgroup *memcg)
@@ -3687,22 +3687,8 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
static void memcg_offline_kmem(struct mem_cgroup *memcg)
{
}
static void memcg_free_kmem(struct mem_cgroup *memcg)
{
}
#endif /* CONFIG_MEMCG_KMEM */
static int memcg_update_kmem_max(struct mem_cgroup *memcg,
unsigned long max)
{
int ret;
mutex_lock(&memcg_max_mutex);
ret = page_counter_set_max(&memcg->kmem, max);
mutex_unlock(&memcg_max_mutex);
return ret;
}
static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max)
{
int ret;
@@ -3768,10 +3754,8 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
case _KMEM:
pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
"Please report your usecase to linux-mm@kvack.org if you "
"depend on this functionality.\n");
ret = memcg_update_kmem_max(memcg, nr_pages);
/* kmem.limit_in_bytes is deprecated. */
ret = -EOPNOTSUPP;
break;
case _TCP:
ret = memcg_update_tcp_max(memcg, nr_pages);
@@ -3916,7 +3900,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
int nid;
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
cgroup_rstat_flush(memcg->css.cgroup);
mem_cgroup_flush_stats();
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
seq_printf(m, "%s=%lu", stat->name,
@@ -3988,7 +3972,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
cgroup_rstat_flush(memcg->css.cgroup);
mem_cgroup_flush_stats();
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
@@ -4491,7 +4475,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
mem_cgroup_flush_stats();
*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
@@ -5324,7 +5308,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
cancel_work_sync(&memcg->high_work);
mem_cgroup_remove_from_trees(memcg);
free_shrinker_info(memcg);
memcg_free_kmem(memcg);
/* Need to offline kmem if online_css() fails */
memcg_offline_kmem(memcg);
mem_cgroup_free(memcg);
}
@@ -5357,21 +5343,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
memcg_wb_domain_size_changed(memcg);
}
void mem_cgroup_flush_stats(void)
{
if (!spin_trylock(&stats_flush_lock))
return;
cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
spin_unlock(&stats_flush_lock);
}
static void flush_memcg_stats_dwork(struct work_struct *w)
{
mem_cgroup_flush_stats();
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
}
static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -5561,7 +5532,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
#endif
static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
unsigned long addr, pte_t ptent)
{
if (!vma->vm_file) /* anonymous vma */
return NULL;
@@ -5736,7 +5707,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
else if (is_swap_pte(ptent))
page = mc_handle_swap_pte(vma, ptent, &ent);
else if (pte_none(ptent))
page = mc_handle_file_pte(vma, addr, ptent, &ent);
page = mc_handle_file_pte(vma, addr, ptent);
if (!page && !ent.val)
return ret;
@@ -6391,7 +6362,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
int i;
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
cgroup_rstat_flush(memcg->css.cgroup);
mem_cgroup_flush_stats();
for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
int nid;

View File

@@ -435,35 +435,39 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
}
}
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
{
spinlock_t *ptl = pmd_lock(mm, pmd);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
mm_inc_nr_ptes(mm);
/*
* Ensure all pte setup (eg. pte page lock and page clearing) are
* visible before the pte is made visible to other CPUs by being
* put into page tables.
*
* The other side of the story is the pointer chasing in the page
* table walking code (when walking the page table without locking;
* ie. most of the time). Fortunately, these data accesses consist
* of a chain of data-dependent loads, meaning most CPUs (alpha
* being the notable exception) will already guarantee loads are
* seen in-order. See the alpha page table accessors for the
* smp_rmb() barriers in page table walking code.
*/
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
pmd_populate(mm, pmd, *pte);
*pte = NULL;
}
spin_unlock(ptl);
}
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
{
spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm);
if (!new)
return -ENOMEM;
/*
* Ensure all pte setup (eg. pte page lock and page clearing) are
* visible before the pte is made visible to other CPUs by being
* put into page tables.
*
* The other side of the story is the pointer chasing in the page
* table walking code (when walking the page table without locking;
* ie. most of the time). Fortunately, these data accesses consist
* of a chain of data-dependent loads, meaning most CPUs (alpha
* being the notable exception) will already guarantee loads are
* seen in-order. See the alpha page table accessors for the
* smp_rmb() barriers in page table walking code.
*/
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
ptl = pmd_lock(mm, pmd);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
mm_inc_nr_ptes(mm);
pmd_populate(mm, pmd, new);
new = NULL;
}
spin_unlock(ptl);
pmd_install(mm, pmd, &new);
if (new)
pte_free(mm, new);
return 0;
@@ -475,10 +479,9 @@ int __pte_alloc_kernel(pmd_t *pmd)
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&init_mm.page_table_lock);
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
smp_wmb(); /* See comment in pmd_install() */
pmd_populate_kernel(&init_mm, pmd, new);
new = NULL;
}
@@ -1335,16 +1338,8 @@ again:
struct page *page;
page = vm_normal_page(vma, addr, ptent);
if (unlikely(details) && page) {
/*
* unmap_shared_mapping_pages() wants to
* invalidate cache without truncating:
* unmap shared but keep private pages.
*/
if (details->check_mapping &&
details->check_mapping != page_rmapping(page))
continue;
}
if (unlikely(zap_skip_check_mapping(details, page)))
continue;
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
@@ -1377,17 +1372,8 @@ again:
is_device_exclusive_entry(entry)) {
struct page *page = pfn_swap_entry_to_page(entry);
if (unlikely(details && details->check_mapping)) {
/*
* unmap_shared_mapping_pages() wants to
* invalidate cache without truncating:
* unmap shared but keep private pages.
*/
if (details->check_mapping !=
page_rmapping(page))
continue;
}
if (unlikely(zap_skip_check_mapping(details, page)))
continue;
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
rss[mm_counter(page)]--;
@@ -2726,19 +2712,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
* proceeding (but do_wp_page is only called after already making such a check;
* and do_anonymous_page can safely check later on).
*/
static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
pte_t *page_table, pte_t orig_pte)
static inline int pte_unmap_same(struct vm_fault *vmf)
{
int same = 1;
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
if (sizeof(pte_t) > sizeof(unsigned long)) {
spinlock_t *ptl = pte_lockptr(mm, pmd);
spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
spin_lock(ptl);
same = pte_same(*page_table, orig_pte);
same = pte_same(*vmf->pte, vmf->orig_pte);
spin_unlock(ptl);
}
#endif
pte_unmap(page_table);
pte_unmap(vmf->pte);
vmf->pte = NULL;
return same;
}
@@ -3323,20 +3309,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
}
static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
pgoff_t first_index,
pgoff_t last_index,
struct zap_details *details)
{
struct vm_area_struct *vma;
pgoff_t vba, vea, zba, zea;
vma_interval_tree_foreach(vma, root,
details->first_index, details->last_index) {
vma_interval_tree_foreach(vma, root, first_index, last_index) {
vba = vma->vm_pgoff;
vea = vba + vma_pages(vma) - 1;
zba = details->first_index;
zba = first_index;
if (zba < vba)
zba = vba;
zea = details->last_index;
zea = last_index;
if (zea > vea)
zea = vea;
@@ -3362,18 +3348,22 @@ void unmap_mapping_page(struct page *page)
{
struct address_space *mapping = page->mapping;
struct zap_details details = { };
pgoff_t first_index;
pgoff_t last_index;
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(PageTail(page));
details.check_mapping = mapping;
details.first_index = page->index;
details.last_index = page->index + thp_nr_pages(page) - 1;
first_index = page->index;
last_index = page->index + thp_nr_pages(page) - 1;
details.zap_mapping = mapping;
details.single_page = page;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
last_index, &details);
i_mmap_unlock_write(mapping);
}
@@ -3393,16 +3383,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
pgoff_t nr, bool even_cows)
{
struct zap_details details = { };
pgoff_t first_index = start;
pgoff_t last_index = start + nr - 1;
details.check_mapping = even_cows ? NULL : mapping;
details.first_index = start;
details.last_index = start + nr - 1;
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
details.zap_mapping = even_cows ? NULL : mapping;
if (last_index < first_index)
last_index = ULONG_MAX;
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
last_index, &details);
i_mmap_unlock_write(mapping);
}
EXPORT_SYMBOL_GPL(unmap_mapping_pages);
@@ -3490,7 +3481,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vm_fault_t ret = 0;
void *shadow = NULL;
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
if (!pte_unmap_same(vmf))
goto out;
entry = pte_to_swp_entry(vmf->orig_pte);
@@ -3857,7 +3848,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}
ret = vma->vm_ops->fault(vmf);
@@ -3928,7 +3918,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
vmf->prealloc_pte = pte_alloc_one(vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -4041,17 +4030,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
if (vmf->prealloc_pte) {
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(vma->vm_mm);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
if (vmf->prealloc_pte)
pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte);
else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
return VM_FAULT_OOM;
}
}
/* See comment in handle_pte_fault() */
@@ -4160,7 +4142,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}
return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
@@ -4835,13 +4816,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&mm->page_table_lock);
if (pgd_present(*pgd)) /* Another has populated it */
if (pgd_present(*pgd)) { /* Another has populated it */
p4d_free(mm, new);
else
} else {
smp_wmb(); /* See comment in pmd_install() */
pgd_populate(mm, pgd, new);
}
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -4858,11 +4839,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&mm->page_table_lock);
if (!p4d_present(*p4d)) {
mm_inc_nr_puds(mm);
smp_wmb(); /* See comment in pmd_install() */
p4d_populate(mm, p4d, new);
} else /* Another has populated it */
pud_free(mm, new);
@@ -4883,14 +4863,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
ptl = pud_lock(mm, pud);
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
smp_wmb(); /* See comment in pmd_install() */
pud_populate(mm, pud, new);
} else /* Another has populated it */
} else { /* Another has populated it */
pmd_free(mm, new);
}
spin_unlock(ptl);
return 0;
}
@@ -5427,7 +5407,6 @@ long copy_huge_page_from_user(struct page *dst_page,
unsigned int pages_per_huge_page,
bool allow_pagefault)
{
void *src = (void *)usr_src;
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
@@ -5440,8 +5419,7 @@ long copy_huge_page_from_user(struct page *dst_page,
else
page_kaddr = kmap_atomic(subpage);
rc = copy_from_user(page_kaddr,
(const void __user *)(src + i * PAGE_SIZE),
PAGE_SIZE);
usr_src + i * PAGE_SIZE, PAGE_SIZE);
if (allow_pagefault)
kunmap(subpage);
else

View File

@@ -3342,7 +3342,7 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
{
mm->total_vm += npages;
WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
if (is_exec_mapping(flags))
mm->exec_vm += npages;

View File

@@ -563,7 +563,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
error = -ENOMEM;
if (!vma)
goto out;
prev = vma->vm_prev;
if (unlikely(grows & PROT_GROWSDOWN)) {
if (vma->vm_start >= end)
goto out;
@@ -581,8 +581,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
goto out;
}
}
if (start > vma->vm_start)
prev = vma;
else
prev = vma->vm_prev;
for (nstart = start ; ; ) {
unsigned long mask_off_old_flags;

View File

@@ -565,6 +565,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
bool *locked, unsigned long flags,
struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
{
long to_account = new_len - old_len;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags;
@@ -583,6 +584,9 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
if (unlikely(flags & MREMAP_DONTUNMAP))
to_account = new_len;
if (vma->vm_ops && vma->vm_ops->may_split) {
if (vma->vm_start != old_addr)
err = vma->vm_ops->may_split(vma, old_addr);
@@ -604,8 +608,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (err)
return err;
if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) {
if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT))
if (vm_flags & VM_ACCOUNT) {
if (security_vm_enough_memory_mm(mm, to_account >> PAGE_SHIFT))
return -ENOMEM;
}
@@ -613,8 +617,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
&need_rmap_locks);
if (!new_vma) {
if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT))
vm_unacct_memory(new_len >> PAGE_SHIFT);
if (vm_flags & VM_ACCOUNT)
vm_unacct_memory(to_account >> PAGE_SHIFT);
return -ENOMEM;
}
@@ -708,8 +712,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
}
static struct vm_area_struct *vma_to_resize(unsigned long addr,
unsigned long old_len, unsigned long new_len, unsigned long flags,
unsigned long *p)
unsigned long old_len, unsigned long new_len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -768,13 +771,6 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
(new_len - old_len) >> PAGE_SHIFT))
return ERR_PTR(-ENOMEM);
if (vma->vm_flags & VM_ACCOUNT) {
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
if (security_vm_enough_memory_mm(mm, charged))
return ERR_PTR(-ENOMEM);
*p = charged;
}
return vma;
}
@@ -787,7 +783,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
unsigned long map_flags = 0;
if (offset_in_page(new_addr))
@@ -830,7 +825,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
old_len = new_len;
}
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
vma = vma_to_resize(addr, old_len, new_len, flags);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
@@ -853,7 +848,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (IS_ERR_VALUE(ret))
goto out1;
goto out;
/* We got a new mapping */
if (!(flags & MREMAP_FIXED))
@@ -862,12 +857,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
uf_unmap);
if (!(offset_in_page(ret)))
goto out;
out1:
vm_unacct_memory(charged);
out:
return ret;
}
@@ -899,7 +888,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
bool locked = false;
bool downgraded = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
@@ -981,7 +969,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
/*
* Ok, we need to grow..
*/
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
vma = vma_to_resize(addr, old_len, new_len, flags);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
@@ -992,10 +980,18 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (old_len == vma->vm_end - addr) {
/* can we just expand the current mapping? */
if (vma_expandable(vma, new_len - old_len)) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
long pages = (new_len - old_len) >> PAGE_SHIFT;
if (vma->vm_flags & VM_ACCOUNT) {
if (security_vm_enough_memory_mm(mm, pages)) {
ret = -ENOMEM;
goto out;
}
}
if (vma_adjust(vma, vma->vm_start, addr + new_len,
vma->vm_pgoff, NULL)) {
vm_unacct_memory(pages);
ret = -ENOMEM;
goto out;
}
@@ -1034,10 +1030,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
&locked, flags, &uf, &uf_unmap);
}
out:
if (offset_in_page(ret)) {
vm_unacct_memory(charged);
if (offset_in_page(ret))
locked = false;
}
if (downgraded)
mmap_read_unlock(current->mm);
else

View File

@@ -1120,27 +1120,24 @@ bool out_of_memory(struct oom_control *oc)
}
/*
* The pagefault handler calls here because it is out of memory, so kill a
* memory-hogging task. If oom_lock is held by somebody else, a parallel oom
* killing is already in progress so do nothing.
* The pagefault handler calls here because some allocation has failed. We have
* to take care of the memcg OOM here because this is the only safe context without
* any locks held but let the oom killer triggered from the allocation context care
* about the global OOM.
*/
void pagefault_out_of_memory(void)
{
struct oom_control oc = {
.zonelist = NULL,
.nodemask = NULL,
.memcg = NULL,
.gfp_mask = 0,
.order = 0,
};
static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
if (mem_cgroup_oom_synchronize(true))
return;
if (!mutex_trylock(&oom_lock))
if (fatal_signal_pending(current))
return;
out_of_memory(&oc);
mutex_unlock(&oom_lock);
if (__ratelimit(&pfoom_rs))
pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
}
SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)

View File

@@ -201,7 +201,7 @@ fail:
panic("Out of memory");
}
#else /* CONFIG_FLATMEM */
#else /* CONFIG_SPARSEMEM */
struct page_ext *lookup_page_ext(const struct page *page)
{

View File

@@ -2425,7 +2425,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
SetPageDirty(page);
unlock_page(page);
return 0;
out_delete_from_cache:

View File

@@ -76,7 +76,7 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start,
set_pte_at(&init_mm, addr, pte, entry);
}
/* Make pte visible before pmd. See comment in __pte_alloc(). */
/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
pmd_populate_kernel(&init_mm, pmd, pgtable);

View File

@@ -135,18 +135,27 @@ EXPORT_SYMBOL(__put_page);
* put_pages_list() - release a list of pages
* @pages: list of pages threaded on page->lru
*
* Release a list of pages which are strung together on page.lru. Currently
* used by read_cache_pages() and related error recovery code.
* Release a list of pages which are strung together on page.lru.
*/
void put_pages_list(struct list_head *pages)
{
while (!list_empty(pages)) {
struct page *victim;
struct page *page, *next;
victim = lru_to_page(pages);
list_del(&victim->lru);
put_page(victim);
list_for_each_entry_safe(page, next, pages, lru) {
if (!put_page_testzero(page)) {
list_del(&page->lru);
continue;
}
if (PageHead(page)) {
list_del(&page->lru);
__put_compound_page(page);
continue;
}
/* Cannot be PageLRU because it's passed to us using the lru */
__ClearPageWaiters(page);
}
free_unref_page_list(pages);
}
EXPORT_SYMBOL(put_pages_list);

View File

@@ -2763,7 +2763,7 @@ static int swap_show(struct seq_file *swap, void *v)
struct swap_info_struct *si = v;
struct file *file;
int len;
unsigned int bytes, inuse;
unsigned long bytes, inuse;
if (si == SEQ_START_TOKEN) {
seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
@@ -2775,7 +2775,7 @@ static int swap_show(struct seq_file *swap, void *v)
file = si->swap_file;
len = seq_file_path(swap, file, " \t\n\\");
seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n",
seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n",
len < 40 ? 40 - len : 1, " ",
S_ISBLK(file_inode(file)->i_mode) ?
"partition" : "file\t",
@@ -3118,7 +3118,7 @@ static bool swap_discardable(struct swap_info_struct *si)
{
struct request_queue *q = bdev_get_queue(si->bdev);
if (!q || !blk_queue_discard(q))
if (!blk_queue_discard(q))
return false;
return true;

View File

@@ -69,10 +69,9 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
pgoff_t offset, max_off;
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
_dst_pte = pte_mkdirty(_dst_pte);
if (page_in_cache && !vm_shared)
writable = false;
if (writable || !page_in_cache)
_dst_pte = pte_mkdirty(_dst_pte);
if (writable) {
if (wp_copy)
_dst_pte = pte_mkuffd_wp(_dst_pte);

View File

@@ -1195,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size,
{
struct vmap_area *va;
struct rb_node *node;
unsigned long length;
/* Start from the root. */
node = free_vmap_area_root.rb_node;
/* Adjust the search size for alignment overhead. */
length = size + align - 1;
while (node) {
va = rb_entry(node, struct vmap_area, rb_node);
if (get_subtree_max_size(node->rb_left) >= length &&
if (get_subtree_max_size(node->rb_left) >= size &&
vstart < va->va_start) {
node = node->rb_left;
} else {
@@ -1216,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size,
/*
* Does not make sense to go deeper towards the right
* sub-tree if it does not have a free block that is
* equal or bigger to the requested search length.
* equal or bigger to the requested search size.
*/
if (get_subtree_max_size(node->rb_right) >= length) {
if (get_subtree_max_size(node->rb_right) >= size) {
node = node->rb_right;
continue;
}
@@ -1226,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size,
/*
* OK. We roll back and find the first right sub-tree,
* that will satisfy the search criteria. It can happen
* only once due to "vstart" restriction.
* due to "vstart" restriction or an alignment overhead
* that is bigger then PAGE_SIZE.
*/
while ((node = rb_parent(node))) {
va = rb_entry(node, struct vmap_area, rb_node);
if (is_within_this_va(va, size, align, vstart))
return va;
if (get_subtree_max_size(node->rb_right) >= length &&
if (get_subtree_max_size(node->rb_right) >= size &&
vstart <= va->va_start) {
/*
* Shift the vstart forward. Please note, we update it with
* parent's start address adding "1" because we do not want
* to enter same sub-tree after it has already been checked
* and no suitable free block found there.
*/
vstart = va->va_start + 1;
node = node->rb_right;
break;
}
@@ -1265,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size,
}
static void
find_vmap_lowest_match_check(unsigned long size)
find_vmap_lowest_match_check(unsigned long size, unsigned long align)
{
struct vmap_area *va_1, *va_2;
unsigned long vstart;
@@ -1274,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size)
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
va_1 = find_vmap_lowest_match(size, 1, vstart);
va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
va_1 = find_vmap_lowest_match(size, align, vstart);
va_2 = find_vmap_lowest_linear_match(size, align, vstart);
if (va_1 != va_2)
pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
@@ -1454,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
return vend;
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
find_vmap_lowest_match_check(size);
find_vmap_lowest_match_check(size, align);
#endif
return nva_start_addr;
@@ -2272,15 +2276,21 @@ void __init vm_area_add_early(struct vm_struct *vm)
*/
void __init vm_area_register_early(struct vm_struct *vm, size_t align)
{
static size_t vm_init_off __initdata;
unsigned long addr;
unsigned long addr = ALIGN(VMALLOC_START, align);
struct vm_struct *cur, **p;
addr = ALIGN(VMALLOC_START + vm_init_off, align);
vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
BUG_ON(vmap_initialized);
for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) {
if ((unsigned long)cur->addr - addr >= vm->size)
break;
addr = ALIGN((unsigned long)cur->addr + cur->size, align);
}
BUG_ON(addr > VMALLOC_END - vm->size);
vm->addr = (void *)addr;
vm_area_add_early(vm);
vm->next = *p;
*p = vm;
}
static void vmap_init_free_space(void)
@@ -2743,6 +2753,13 @@ void *vmap(struct page **pages, unsigned int count,
might_sleep();
/*
* Your top guard is someone else's bottom guard. Not having a top
* guard compromises someone else's mappings too.
*/
if (WARN_ON_ONCE(flags & VM_NO_GUARD))
flags &= ~VM_NO_GUARD;
if (count > totalram_pages())
return NULL;
@@ -2860,6 +2877,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
/* High-order pages or fallback path if "bulk" fails. */
while (nr_allocated < nr_pages) {
if (fatal_signal_pending(current))
break;
if (nid == NUMA_NO_NODE)
page = alloc_pages(gfp, order);
else
@@ -2887,6 +2907,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
int node)
{
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t orig_gfp_mask = gfp_mask;
unsigned long addr = (unsigned long)area->addr;
unsigned long size = get_vm_area_size(area);
unsigned long array_size;
@@ -2907,7 +2928,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
}
if (!area->pages) {
warn_alloc(gfp_mask, NULL,
warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, failed to allocated page array size %lu",
nr_small_pages * PAGE_SIZE, array_size);
free_vm_area(area);
@@ -2927,7 +2948,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via __vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
warn_alloc(gfp_mask, NULL,
warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);
goto fail;
@@ -2935,7 +2956,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
if (vmap_pages_range(addr, addr + size, prot, area->pages,
page_shift) < 0) {
warn_alloc(gfp_mask, NULL,
warn_alloc(orig_gfp_mask, NULL,
"vmalloc error: size %lu, failed to map pages",
area->nr_pages * PAGE_SIZE);
goto fail;
@@ -3856,6 +3877,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
{
if (IS_ENABLED(CONFIG_NUMA)) {
unsigned int nr, *counters = m->private;
unsigned int step = 1U << vm_area_page_order(v);
if (!counters)
return;
@@ -3867,9 +3889,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
for (nr = 0; nr < v->nr_pages; nr++)
counters[page_to_nid(v->pages[nr])]++;
for (nr = 0; nr < v->nr_pages; nr += step)
counters[page_to_nid(v->pages[nr])] += step;
for_each_node_state(nr, N_HIGH_MEMORY)
if (counters[nr])
seq_printf(m, " N%u=%u", nr, counters[nr]);
@@ -3905,7 +3926,7 @@ static int s_show(struct seq_file *m, void *p)
(void *)va->va_start, (void *)va->va_end,
va->va_end - va->va_start);
return 0;
goto final;
}
v = va->vm;
@@ -3946,6 +3967,7 @@ static int s_show(struct seq_file *m, void *p)
/*
* As a final step, dump "unpurged" areas.
*/
final:
if (list_is_last(&va->list, &vmap_area_list))
show_purge_info(m);