FROMLIST: mm: VMA sequence count

Wrap the VMA modifications (vma_adjust/unmap_page_range) with sequence
counts such that we can easily test if a VMA is changed.

The unmap_page_range() one allows us to make assumptions about
page-tables; when we find the seqcount hasn't changed we can assume
page-tables are still valid.

The flip side is that we cannot distinguish between a vma_adjust() and
the unmap_page_range() -- where with the former we could have
re-checked the vma bounds against the address.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

[Port to 4.12 kernel]
[Build depends on CONFIG_SPECULATIVE_PAGE_FAULT]
[Introduce vm_write_* inline function depending on
 CONFIG_SPECULATIVE_PAGE_FAULT]
[Fix lock dependency between mapping->i_mmap_rwsem and vma->vm_sequence by
 using vm_raw_write* functions]
[Fix a lock dependency warning in mmap_region() when entering the error
 path]
[move sequence initialisation INIT_VMA()]
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Link: https://lore.kernel.org/lkml/1523975611-15978-9-git-send-email-ldufour@linux.vnet.ibm.com/
Bug: 161210518
Change-Id: Ibc23ef3b9dbb80323c0f24cb06da34b4c3a8fa71
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
This commit is contained in:
Peter Zijlstra
2018-04-17 16:33:14 +02:00
committed by Suren Baghdasaryan
parent 0076600734
commit 2ce6b11ac3
4 changed files with 80 additions and 0 deletions

View File

@@ -628,6 +628,9 @@ struct vm_operations_struct {
static inline void INIT_VMA(struct vm_area_struct *vma)
{
INIT_LIST_HEAD(&vma->anon_vma_chain);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
seqcount_init(&vma->vm_sequence);
#endif
}
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
@@ -1688,6 +1691,47 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline void vm_write_begin(struct vm_area_struct *vma)
{
write_seqcount_begin(&vma->vm_sequence);
}
static inline void vm_write_begin_nested(struct vm_area_struct *vma,
int subclass)
{
write_seqcount_begin_nested(&vma->vm_sequence, subclass);
}
static inline void vm_write_end(struct vm_area_struct *vma)
{
write_seqcount_end(&vma->vm_sequence);
}
static inline void vm_raw_write_begin(struct vm_area_struct *vma)
{
raw_write_seqcount_begin(&vma->vm_sequence);
}
static inline void vm_raw_write_end(struct vm_area_struct *vma)
{
raw_write_seqcount_end(&vma->vm_sequence);
}
#else
static inline void vm_write_begin(struct vm_area_struct *vma)
{
}
static inline void vm_write_begin_nested(struct vm_area_struct *vma,
int subclass)
{
}
static inline void vm_write_end(struct vm_area_struct *vma)
{
}
static inline void vm_raw_write_begin(struct vm_area_struct *vma)
{
}
static inline void vm_raw_write_end(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);

View File

@@ -379,6 +379,9 @@ struct vm_area_struct {
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
seqcount_t vm_sequence;
#endif
} __randomize_layout;
struct core_thread {

View File

@@ -1444,6 +1444,7 @@ void unmap_page_range(struct mmu_gather *tlb,
unsigned long next;
BUG_ON(addr >= end);
vm_write_begin(vma);
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1453,6 +1454,7 @@ void unmap_page_range(struct mmu_gather *tlb,
next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
vm_write_end(vma);
}

View File

@@ -750,6 +750,30 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
long adjust_next = 0;
int remove_next = 0;
/*
* Why using vm_raw_write*() functions here to avoid lockdep's warning ?
*
* Locked is complaining about a theoretical lock dependency, involving
* 3 locks:
* mapping->i_mmap_rwsem --> vma->vm_sequence --> fs_reclaim
*
* Here are the major path leading to this dependency :
* 1. __vma_adjust() mmap_sem -> vm_sequence -> i_mmap_rwsem
* 2. move_vmap() mmap_sem -> vm_sequence -> fs_reclaim
* 3. __alloc_pages_nodemask() fs_reclaim -> i_mmap_rwsem
* 4. unmap_mapping_range() i_mmap_rwsem -> vm_sequence
*
* So there is no way to solve this easily, especially because in
* unmap_mapping_range() the i_mmap_rwsem is grab while the impacted
* VMAs are not yet known.
* However, the way the vm_seq is used is guarantying that we will
* never block on it since we just check for its value and never wait
* for it to move, see vma_has_changed() and handle_speculative_fault().
*/
vm_raw_write_begin(vma);
if (next)
vm_raw_write_begin(next);
if (next && !insert) {
struct vm_area_struct *exporter = NULL, *importer = NULL;
@@ -957,6 +981,7 @@ again:
anon_vma_merge(vma, next);
mm->map_count--;
mpol_put(vma_policy(next));
vm_raw_write_end(next);
vm_area_free(next);
/*
* In mprotect's case 6 (see comments on vma_merge),
@@ -971,6 +996,8 @@ again:
* "vma->vm_next" gap must be updated.
*/
next = vma->vm_next;
if (next)
vm_raw_write_begin(next);
} else {
/*
* For the scope of the comment "next" and
@@ -1017,6 +1044,10 @@ again:
if (insert && file)
uprobe_mmap(insert);
if (next && next != vma)
vm_raw_write_end(next);
vm_raw_write_end(vma);
validate_mm(mm);
return 0;