From e97d091568beaaee2d1602480fe195441210f12e Mon Sep 17 00:00:00 2001 From: Mauro Ribeiro Date: Mon, 21 Apr 2014 22:02:54 -0300 Subject: [PATCH] ubuntu/aufs: major updates here. Patchs from: http://sourceforge.net/p/aufs/aufs3-standalone/ci/aufs3.8/tree/fs/aufs/ If this breaks something I know who to blame --- fs/buffer.c | 2 +- fs/notify/group.c | 3 +- fs/proc/nommu.c | 4 +- fs/proc/task_mmu.c | 5 ++- fs/proc/task_nommu.c | 4 +- include/linux/fs.h | 3 +- include/linux/mm.h | 84 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 2 + kernel/fork.c | 2 +- mm/filemap.c | 2 +- mm/fremap.c | 5 ++- mm/madvise.c | 4 +- mm/memory.c | 4 +- mm/mmap.c | 12 +++--- mm/msync.c | 4 +- mm/nommu.c | 10 ++--- ubuntu/aufs/hfsnotify.c | 9 ++++- 17 files changed, 130 insertions(+), 29 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 1ab9a41da50b..c3baaf020d29 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2381,7 +2381,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, * Update file times before taking page lock. We may end up failing the * fault so this update may be superfluous but who really cares... */ - file_update_time(vma->vm_file); + vma_file_update_time(vma); ret = __block_page_mkwrite(vma, vmf, get_block); sb_end_pagefault(sb); diff --git a/fs/notify/group.c b/fs/notify/group.c index 258faea98788..2ff2a0fd5aea 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -66,7 +66,7 @@ void fsnotify_get_group(struct fsnotify_group *group) { atomic_inc(&group->refcnt); } -EXPORT_SYMBOL(fsnotify_put_group); +EXPORT_SYMBOL(fsnotify_get_group); /* * Drop a reference to a group. Free it if it's through. @@ -76,6 +76,7 @@ void fsnotify_put_group(struct fsnotify_group *group) if (atomic_dec_and_test(&group->refcnt)) fsnotify_final_destroy_group(group); } +EXPORT_SYMBOL(fsnotify_put_group); /* * Create a new fsnotify_group and hold a reference for the group returned. diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index b1822dde55c2..d8518aae6ad3 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -45,7 +45,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) file = region->vm_file; if (file) { - struct inode *inode = region->vm_file->f_path.dentry->d_inode; + struct inode *inode; + file = vmr_pr_or_file(region); + inode = file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f31d90ffcd47..b328e978d75f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -271,7 +271,9 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) const char *name = NULL; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode; + file = vma_pr_or_file(vma); + inode = file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; @@ -1284,6 +1286,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { + file = vma_pr_or_file(vma); seq_printf(m, " file="); seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 1ccfa537f5f5..ba3c513b0d53 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -149,7 +149,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, file = vma->vm_file; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode; + file = vma_pr_or_file(file); + inode = file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; diff --git a/include/linux/fs.h b/include/linux/fs.h index 179896dd215a..726273723130 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2044,7 +2044,8 @@ extern struct kmem_cache *names_cachep; extern void final_putname(struct filename *name); -#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) +#define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) +#define __getname() __getname_gfp(GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL #define putname(name) final_putname(name) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9568b90ef522..dbade0f566cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include struct mempolicy; struct anon_vma; @@ -1035,6 +1038,87 @@ static inline int fixup_user_fault(struct task_struct *tsk, } #endif +/* + * Mainly for aufs which mmap(2) diffrent file and wants to print different path + * in /proc/PID/maps. + */ +/* #define AUFS_DEBUG_MMAP */ +static inline void aufs_trace(struct file *f, struct file *pr, + const char func[], int line, const char func2[]) +{ +#ifdef AUFS_DEBUG_MMAP + if (pr) + pr_info("%s:%d: %s, %p\n", func, line, func2, + f ? (char *)f->f_dentry->d_name.name : "(null)"); +#endif +} + +static inline struct file *vmr_do_pr_or_file(struct vm_region *region, + const char func[], int line) +{ + struct file *f = region->vm_file, *pr = region->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + return (f && pr) ? pr : f; +} + +static inline void vmr_do_fput(struct vm_region *region, + const char func[], int line) +{ + struct file *f = region->vm_file, *pr = region->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + fput(f); + if (f && pr) + fput(pr); +} + +static inline void vma_do_file_update_time(struct vm_area_struct *vma, + const char func[], int line) +{ + struct file *f = vma->vm_file, *pr = vma->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + file_update_time(f); + if (f && pr) + file_update_time(pr); +} + +static inline struct file *vma_do_pr_or_file(struct vm_area_struct *vma, + const char func[], int line) +{ + struct file *f = vma->vm_file, *pr = vma->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + return (f && pr) ? pr : f; +} + +static inline void vma_do_get_file(struct vm_area_struct *vma, + const char func[], int line) +{ + struct file *f = vma->vm_file, *pr = vma->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + get_file(f); + if (f && pr) + get_file(pr); +} + +static inline void vma_do_fput(struct vm_area_struct *vma, + const char func[], int line) +{ + struct file *f = vma->vm_file, *pr = vma->vm_prfile; + aufs_trace(f, pr, func, line, __func__); + fput(f); + if (f && pr) + fput(pr); +} + +#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \ + __LINE__) +#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__) +#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \ + __LINE__) +#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \ + __LINE__) +#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__) +#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__) + extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 939fbd3e93a2..6fd6e9cf4fde 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -212,6 +212,7 @@ struct vm_region { unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ + struct file *vm_prfile; /* the virtual backing file or NULL */ int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for @@ -280,6 +281,7 @@ struct vm_area_struct { unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units, *not* PAGE_CACHE_SIZE */ struct file * vm_file; /* File we map to (can be NULL). */ + struct file *vm_prfile; /* shadow of vm_file */ void * vm_private_data; /* was vm_pte (shared mem) */ #ifndef CONFIG_MMU diff --git a/kernel/fork.c b/kernel/fork.c index 65b8b0a3b61f..1c9de76b5370 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -416,7 +416,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = file->f_mapping; - get_file(file); + vma_get_file(tmp); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); mutex_lock(&mapping->i_mmap_mutex); diff --git a/mm/filemap.c b/mm/filemap.c index d1c13c279a68..cc5484ce723b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1718,7 +1718,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + vma_file_update_time(vma); lock_page(page); if (page->mapping != inode->i_mapping) { unlock_page(page); diff --git a/mm/fremap.c b/mm/fremap.c index a0aaf0e56800..8de6c5512876 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -197,12 +197,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, */ if (mapping_cap_account_dirty(mapping)) { unsigned long addr; - struct file *file = get_file(vma->vm_file); + struct file *file = vma->vm_file; flags &= MAP_NONBLOCK; + vma_get_file(vma); addr = mmap_region(file, start, size, flags, vma->vm_flags, pgoff); - fput(file); + vma_fput(vma); if (IS_ERR_VALUE(addr)) { err = addr; } else { diff --git a/mm/madvise.c b/mm/madvise.c index 03dfa5c7adb3..7980036e166c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -234,12 +234,12 @@ static long madvise_remove(struct vm_area_struct *vma, * vma's reference to the file) can go away as soon as we drop * mmap_sem. */ - get_file(f); + vma_get_file(vma); up_read(¤t->mm->mmap_sem); error = do_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); - fput(f); + vma_fput(vma); down_read(¤t->mm->mmap_sem); return error; } diff --git a/mm/memory.c b/mm/memory.c index f68a69d527e2..7f602e3e328a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2718,7 +2718,7 @@ reuse: set_page_dirty_balance(dirty_page, page_mkwrite); /* file_update_time outside page_lock */ if (vma->vm_file) - file_update_time(vma->vm_file); + vma_file_update_time(vma); } put_page(dirty_page); if (page_mkwrite) { @@ -3425,7 +3425,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* file_update_time outside page_lock */ if (vma->vm_file && !page_mkwrite) - file_update_time(vma->vm_file); + vma_file_update_time(vma); } else { unlock_page(vmf.page); if (anon) diff --git a/mm/mmap.c b/mm/mmap.c index 44ec3e690f38..c6d443ae9716 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -241,7 +241,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) - fput(vma->vm_file); + vma_fput(vma); mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next; @@ -815,7 +815,7 @@ again: remove_next = 1 + (end > next->vm_end); if (remove_next) { if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); - fput(file); + vma_fput(vma); } if (next->anon_vma) anon_vma_merge(vma, next); @@ -1552,8 +1552,8 @@ out: unmap_and_free_vma: if (correct_wcount) atomic_inc(&inode->i_writecount); + vma_fput(vma); vma->vm_file = NULL; - fput(file); /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); @@ -2379,7 +2379,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, goto out_free_mpol; if (new->vm_file) - get_file(new->vm_file); + vma_get_file(new); if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); @@ -2398,7 +2398,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, if (new->vm_ops && new->vm_ops->close) new->vm_ops->close(new); if (new->vm_file) - fput(new->vm_file); + vma_fput(new); unlink_anon_vmas(new); out_free_mpol: mpol_put(pol); @@ -2795,7 +2795,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) - get_file(new_vma->vm_file); + vma_get_file(new_vma); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); diff --git a/mm/msync.c b/mm/msync.c index 632df4527c01..02d770eb9fc7 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -80,10 +80,10 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) start = vma->vm_end; if ((flags & MS_SYNC) && file && (vma->vm_flags & VM_SHARED)) { - get_file(file); + vma_get_file(vma); up_read(&mm->mmap_sem); error = vfs_fsync(file, 0); - fput(file); + vma_fput(vma); if (error || start >= end) goto out; down_read(&mm->mmap_sem); diff --git a/mm/nommu.c b/mm/nommu.c index bbe1f3fc18b1..078eae6dd964 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -647,7 +647,7 @@ static void __put_nommu_region(struct vm_region *region) up_write(&nommu_region_sem); if (region->vm_file) - fput(region->vm_file); + vmr_fput(region); /* IO memory and memory shared directly out of the pagecache * from ramfs/tmpfs mustn't be released here */ @@ -805,7 +805,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) - fput(vma->vm_file); + vma_fput(vma); put_nommu_region(vma->vm_region); kmem_cache_free(vm_area_cachep, vma); } @@ -1368,7 +1368,7 @@ unsigned long do_mmap_pgoff(struct file *file, goto error_just_free; } } - fput(region->vm_file); + vmr_fput(region); kmem_cache_free(vm_region_jar, region); region = pregion; result = start; @@ -1444,10 +1444,10 @@ error_just_free: up_write(&nommu_region_sem); error: if (region->vm_file) - fput(region->vm_file); + vmr_fput(region); kmem_cache_free(vm_region_jar, region); if (vma->vm_file) - fput(vma->vm_file); + vma_fput(vma); kmem_cache_free(vm_area_cachep, vma); kleave(" = %d", ret); return ret; diff --git a/ubuntu/aufs/hfsnotify.c b/ubuntu/aufs/hfsnotify.c index bad303f3c59a..6fab0f894883 100644 --- a/ubuntu/aufs/hfsnotify.c +++ b/ubuntu/aufs/hfsnotify.c @@ -66,13 +66,18 @@ static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn) { struct fsnotify_mark *mark; unsigned long long ull; + struct fsnotify_group *group; ull = atomic64_inc_return(&au_hfsn_ifree); BUG_ON(!ull); mark = &hn->hn_mark; - fsnotify_destroy_mark(mark); - fsnotify_put_mark(mark); + spin_lock(&mark->lock); + group = mark->group; + fsnotify_get_group(group); + spin_unlock(&mark->lock); + fsnotify_destroy_mark(mark, group); + fsnotify_put_group(group); /* free hn by myself */ return 0;