From 5b2fc72a91edd8ed5ab6daf62af71dff9106492b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Mar 2020 00:41:38 -0800 Subject: [PATCH 01/68] fscrypt: don't evict dirty inodes after removing key After FS_IOC_REMOVE_ENCRYPTION_KEY removes a key, it syncs the filesystem and tries to get and put all inodes that were unlocked by the key so that unused inodes get evicted via fscrypt_drop_inode(). Normally, the inodes are all clean due to the sync. However, after the filesystem is sync'ed, userspace can modify and close one of the files. (Userspace is *supposed* to close the files before removing the key. But it doesn't always happen, and the kernel can't assume it.) This causes the inode to be dirtied and have i_count == 0. Then, fscrypt_drop_inode() failed to consider this case and indicated that the inode can be dropped, causing the write to be lost. On f2fs, other problems such as a filesystem freeze could occur due to the inode being freed while still on f2fs's dirty inode list. Fix this bug by making fscrypt_drop_inode() only drop clean inodes. I've written an xfstest which detects this bug on ext4, f2fs, and ubifs. Fixes: b1c0ec3599f4 ("fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl") Cc: # v5.4+ Link: https://lore.kernel.org/r/20200305084138.653498-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keysetup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index f9ab21c778e1..84039a163585 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -538,6 +538,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with From 66f10f5d58662f1de10c9dd01dad5d0e22886d83 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Sat, 28 Mar 2020 00:28:08 -0700 Subject: [PATCH 02/68] mm/swapfile.c: move inode_lock out of claim_swapfile claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -ebusy). and, on the other error cases, it does not lock the inode. this inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon(). this commit fixes this issue by moving the inode_lock() and is_swapfile check out of claim_swapfile(). the inode is unlocked in "bad_swap_unlock_inode" section, so that the inode is ensured to be unlocked at "bad_swap". thus, error handling codes after the locking now jumps to "bad_swap_unlock_inode" instead of "bad_swap". ===================================== warning: bad unlock balance detected! 5.5.0-rc7+ #176 not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: [] __do_sys_swapon+0x94b/0x3550 but there are no more locks to release! other info that might help us debug this: no locks held by swapon/4294. stack backtrace: cpu: 5 pid: 4294 comm: swapon not tainted 5.5.0-rc7-btrfs-zns+ #176 hardware name: asus all series/h87-pro, bios 2102 07/29/2014 call trace: dump_stack+0xa1/0xea ? __do_sys_swapon+0x94b/0x3550 print_unlock_imbalance_bug.cold+0x114/0x123 ? __do_sys_swapon+0x94b/0x3550 lock_release+0x562/0xed0 ? kvfree+0x31/0x40 ? lock_downgrade+0x770/0x770 ? kvfree+0x31/0x40 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 up_write+0x2d/0x490 ? kfree+0x293/0x2f0 __do_sys_swapon+0x94b/0x3550 ? putname+0xb0/0xf0 ? kmem_cache_free+0x2e7/0x370 ? do_sys_open+0x184/0x3e0 ? generic_max_swapfile_size+0x40/0x40 ? do_syscall_64+0x27/0x4b0 ? entry_syscall_64_after_hwframe+0x49/0xbe ? lockdep_hardirqs_on+0x38c/0x590 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_syscall_64_after_hwframe+0x49/0xbe rip: 0033:0x7f15da0a0dc7 link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com fixes: 1638045c3677 ("mm: set s_swapfile on blockdev swap devices") signed-off-by: naohiro aota reviewed-by: andrew morton reviewed-by: darrick j. wong tested-by: qais youef cc: christoph hellwig cc: --- mm/swapfile.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 655de6babeb2..d12789904d6f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2886,10 +2886,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; } - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; } @@ -3144,36 +3140,40 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; + inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page); maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3198,7 +3198,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for (ci = 0; ci < nr_cluster; ci++) @@ -3207,7 +3207,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3221,13 +3221,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3267,7 +3267,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; /* * Flush any pending IO and dirty mappings before we start using this @@ -3277,7 +3277,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; } mutex_lock(&swapon_mutex); @@ -3302,6 +3302,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3309,6 +3311,7 @@ bad_swap: set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3320,13 +3323,8 @@ bad_swap: kvfree(frontswap_map); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page); From fc414c6c451197f1eaa8e997a62d58aeaf947fdc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:45:12 +0800 Subject: [PATCH 03/68] f2fs: fix to wait all node page writeback There is a race condition that we may miss to wait for all node pages writeback, fix it. - fsync() - shrink - f2fs_do_sync_file - __write_node_page - set_page_writeback(page#0) : remove DIRTY/TOWRITE flag - f2fs_fsync_node_pages : won't find page #0 as TOWRITE flag was removeD - f2fs_wait_on_node_pages_writeback : wont' wait page #0 writeback as it was not in fsync_node_list list. - f2fs_add_fsync_node_entry Fixes: 50fa53eccf9f ("f2fs: fix to avoid broken of dnode block list") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9b4d197fae8e..dbc0533080c9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1562,15 +1562,16 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - set_page_writeback(page); - ClearPageError(page); - + /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(sbi, page)) { seq = f2fs_add_fsync_node_entry(sbi, page); if (seq_id) *seq_id = seq; } + set_page_writeback(page); + ClearPageError(page); + fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); From 2d41291c723fc5fe51bfafc7691d8f1c2e7f82cd Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Feb 2020 09:19:07 +0530 Subject: [PATCH 04/68] f2fs: fix the panic in do_checkpoint() There could be a scenario where f2fs_sync_meta_pages() will not ensure that all F2FS_DIRTY_META pages are submitted for IO. Thus, resulting in the below panic in do_checkpoint() - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && !f2fs_cp_error(sbi)); This can happen in a low-memory condition, where shrinker could also be doing the writepage operation (stack shown below) at the same time when checkpoint is running on another core. schedule down_write f2fs_submit_page_write -> by this time, this page in page cache is tagged as PAGECACHE_TAG_WRITEBACK and PAGECACHE_TAG_DIRTY is cleared, due to which f2fs_sync_meta_pages() cannot sync this page in do_checkpoint() path. f2fs_do_write_meta_page __f2fs_write_meta_page f2fs_write_meta_page shrink_page_list shrink_inactive_list shrink_node_memcg shrink_node kswapd Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 16 +++++++--------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 44e84ac5c941..9c88fb3d255a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1250,20 +1250,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi) f2fs_unlock_all(sbi); } -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!get_pages(sbi, F2FS_WB_CP_DATA)) + if (!get_pages(sbi, type)) break; if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(5*HZ); + io_schedule_timeout(HZ/50); } finish_wait(&sbi->cp_wait, &wait); } @@ -1384,8 +1384,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); /* * modify checkpoint @@ -1493,11 +1491,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && - !f2fs_cp_error(sbi)); + /* Wait for all dirty meta pages to be submitted for IO */ + f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META); /* wait for previous submitted meta pages writeback */ - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* flush all device cache */ err = f2fs_flush_device_cache(sbi); @@ -1506,7 +1504,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* barrier and flush checkpoint cp pack 2 page if it can */ commit_checkpoint(sbi, ckpt, start_blk); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); /* * invalidate intermediate page cache borrowed from meta inode which are diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6020f2a76265..610c9567dd2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3307,7 +3307,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); -void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi); +void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7f215e4f8a11..9f1fed7c7e5f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1180,7 +1180,7 @@ static void f2fs_put_super(struct super_block *sb) /* our cp_error case, we can wait for any writeback page */ f2fs_flush_merged_writes(sbi); - f2fs_wait_on_all_pages_writeback(sbi); + f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); f2fs_bug_on(sbi, sbi->fsync_node_num); From ee10cab2bf31071605c66c8ad002dc8248e8385d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 20 Feb 2020 20:50:37 -0800 Subject: [PATCH 05/68] f2fs: fix leaking uninitialized memory in compressed clusters When the compressed data of a cluster doesn't end on a page boundary, the remainder of the last page must be zeroed in order to avoid leaking uninitialized memory to disk. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d8a64be90a50..ef7dd04312fe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -385,11 +385,15 @@ static int f2fs_compress_pages(struct compress_ctx *cc) for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + /* zero out any unused part of the last page */ + memset(&cc->cbuf->cdata[cc->clen], 0, + (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE)); + vunmap(cc->cbuf); vunmap(cc->rbuf); - nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); - for (i = nr_cpages; i < cc->nr_cpages; i++) { f2fs_put_compressed_page(cc->cpages[i]); cc->cpages[i] = NULL; From 3fff3163db49237031e5d82c923a5366c84924c3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:17 +0800 Subject: [PATCH 06/68] f2fs: fix to avoid NULL pointer dereference Unable to handle kernel NULL pointer dereference at virtual address 00000000 PC is at f2fs_free_dic+0x60/0x2c8 LR is at f2fs_decompress_pages+0x3c4/0x3e8 f2fs_free_dic+0x60/0x2c8 f2fs_decompress_pages+0x3c4/0x3e8 __read_end_io+0x78/0x19c f2fs_post_read_work+0x6c/0x94 process_one_work+0x210/0x48c worker_thread+0x2e8/0x44c kthread+0x110/0x120 ret_from_fork+0x10/0x18 In f2fs_free_dic(), we can not use f2fs_put_page(,1) to release dic->tpages[i], as the page's mapping is NULL. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ef7dd04312fe..dde8ca08e94d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1137,7 +1137,8 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) continue; - f2fs_put_page(dic->tpages[i], 1); + unlock_page(dic->tpages[i]); + put_page(dic->tpages[i]); } kfree(dic->tpages); } From cb54e0dd3343f10a7762a0213968f9e801f8f1b0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:18 +0800 Subject: [PATCH 07/68] f2fs: recycle unused compress_data.chksum feild In Struct compress_data, chksum field was never used, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 1 - fs/f2fs/f2fs.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index dde8ca08e94d..5ca2ea0e1e45 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -380,7 +380,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) } cc->cbuf->clen = cpu_to_le32(cc->clen); - cc->cbuf->chksum = cpu_to_le32(0); for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 610c9567dd2b..f9094e3adaf5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1214,10 +1214,9 @@ enum compress_algorithm_type { COMPRESS_MAX, }; -#define COMPRESS_DATA_RESERVED_SIZE 4 +#define COMPRESS_DATA_RESERVED_SIZE 5 struct compress_data { __le32 clen; /* compressed data size */ - __le32 chksum; /* checksum of compressed data */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; From 5063cb34477b960c48d4c2f7844fcb035476c966 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:19 +0800 Subject: [PATCH 08/68] f2fs: add missing function name in kernel message Otherwise, we can not distinguish the exact location of messages, when there are more than one places printing same message. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f9094e3adaf5..a5f7a495b874 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2210,7 +2210,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + f2fs_warn(sbi, "dec_valid_node_count: inconsistent i_blocks, ino:%lu, iblocks:%llu", inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dbc0533080c9..25ea3f6229bb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1186,8 +1186,9 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), + "f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } From 3e27aebf89bc8864b08009e8986a7bb4510ac5f1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:16 +0800 Subject: [PATCH 09/68] f2fs: fix to avoid potential deadlock Using f2fs_trylock_op() in f2fs_write_compressed_pages() to avoid potential deadlock like we did in f2fs_write_single_data_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5ca2ea0e1e45..6da33ba2b2db 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -775,7 +775,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .encrypted_page = NULL, .compressed_page = NULL, .submitted = false, - .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, .encrypted = f2fs_encrypted_file(cc->inode), @@ -788,9 +787,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + if (!f2fs_trylock_op(sbi)) + return -EAGAIN; - f2fs_lock_op(sbi); + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (err) From 0f87f20d0536ae21524ce1c25335aec991229114 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Feb 2020 18:26:46 +0800 Subject: [PATCH 10/68] f2fs: fix to check i_compr_blocks correctly inode.i_blocks counts based on 512byte sector, we need to convert to 4kb sized block count before comparing to i_compr_blocks. In addition, add to print message when sanity check on inode compression configs failed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 78c3f1d70f1d..a7c8a7347ff7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -291,13 +291,30 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_log_cluster_size)) { - if (ri->i_compress_algorithm >= COMPRESS_MAX) + if (ri->i_compress_algorithm >= COMPRESS_MAX) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "compress algorithm: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_compress_algorithm); return false; - if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + } + if (le64_to_cpu(ri->i_compr_blocks) > + SECTOR_TO_BLOCK(inode->i_blocks)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent " + "i_compr_blocks:%llu, i_blocks:%lu, run fsck to fix", + __func__, inode->i_ino, + le64_to_cpu(ri->i_compr_blocks), + SECTOR_TO_BLOCK(inode->i_blocks)); return false; + } if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || - ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " + "log cluster size: %u, run fsck to fix", + __func__, inode->i_ino, + ri->i_log_cluster_size); return false; + } } return true; From 584ae9ec0fd81e307aca70b00c65407540640eeb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:03 +0800 Subject: [PATCH 11/68] f2fs: cover last_disk_size update with spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change solves below hangtask issue: INFO: task kworker/u16:1:58 blocked for more than 122 seconds. Not tainted 5.6.0-rc2-00590-g9983bdae4974e #11 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u16:1 D 0 58 2 0x00000000 Workqueue: writeback wb_workfn (flush-179:0) Backtrace: (__schedule) from [] (schedule+0x78/0xf4) (schedule) from [] (rwsem_down_write_slowpath+0x24c/0x4c0) (rwsem_down_write_slowpath) from [] (down_write+0x6c/0x70) (down_write) from [] (f2fs_write_single_data_page+0x608/0x7ac) (f2fs_write_single_data_page) from [] (f2fs_write_cache_pages+0x2b4/0x7c4) (f2fs_write_cache_pages) from [] (f2fs_write_data_pages+0x344/0x35c) (f2fs_write_data_pages) from [] (do_writepages+0x3c/0xd4) (do_writepages) from [] (__writeback_single_inode+0x44/0x454) (__writeback_single_inode) from [] (writeback_sb_inodes+0x204/0x4b0) (writeback_sb_inodes) from [] (__writeback_inodes_wb+0x50/0xe4) (__writeback_inodes_wb) from [] (wb_writeback+0x294/0x338) (wb_writeback) from [] (wb_workfn+0x35c/0x54c) (wb_workfn) from [] (process_one_work+0x214/0x544) (process_one_work) from [] (worker_thread+0x4c/0x574) (worker_thread) from [] (kthread+0x144/0x170) (kthread) from [] (ret_from_fork+0x14/0x2c) Reported-and-tested-by: Ondřej Jirman Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/super.c | 1 + 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 6da33ba2b2db..25bc6154c31c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -898,10 +898,10 @@ unlock_continue: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - down_write(&fi->i_sem); + spin_lock(&fi->i_size_lock); if (fi->last_disk_size < psize) fi->last_disk_size = psize; - up_write(&fi->i_sem); + spin_unlock(&fi->i_size_lock); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b706f408fcac..be4efce64987 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2647,10 +2647,10 @@ write: if (err) { file_set_keep_isize(inode); } else { - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } done: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a5f7a495b874..63ab26a81cb6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -696,6 +696,7 @@ struct f2fs_inode_info { struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ + spinlock_t i_size_lock; /* protect last_disk_size */ #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; @@ -2856,9 +2857,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) if (!f2fs_is_time_consistent(inode)) return false; - down_read(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); - up_read(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); return ret; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 054b63ee67d7..f51f4e9700ee 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -932,10 +932,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - down_write(&F2FS_I(inode)->i_sem); + spin_lock(&F2FS_I(inode)->i_size_lock); inode->i_mtime = inode->i_ctime = current_time(inode); F2FS_I(inode)->last_disk_size = i_size_read(inode); - up_write(&F2FS_I(inode)->i_sem); + spin_unlock(&F2FS_I(inode)->i_size_lock); } __setattr_copy(inode, attr); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9f1fed7c7e5f..9a31a6fd34f8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -960,6 +960,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); init_rwsem(&fi->i_sem); + spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); From 9f900c267f7ed6f39f31916ed68fa711b889d4b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:04 +0800 Subject: [PATCH 12/68] f2fs: remove i_sem lock coverage in f2fs_setxattr() f2fs_inode.xattr_ver field was gone after commit d260081ccf37 ("f2fs: change recovery policy of xattr node block"), remove i_sem lock coverage in f2fs_setxattr() Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 296b3189448a..6794a13064be 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -758,12 +758,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - /* protect xattr_ver */ - down_write(&F2FS_I(inode)->i_sem); down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_xattr_sem); - up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); From 82304e39e61b5e11129efc52397a9e9b12dd44dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Feb 2020 19:30:05 +0800 Subject: [PATCH 13/68] f2fs: fix inconsistent comments Lack of maintenance on comments may mislead developers, fix them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 ++++-------------- fs/f2fs/data.c | 19 ++++++------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 1 - fs/f2fs/gc.c | 5 ++++- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 6 +----- fs/f2fs/shrinker.c | 2 +- fs/f2fs/super.c | 4 ++-- 10 files changed, 21 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c88fb3d255a..0dea31f1ddad 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,9 +50,6 @@ repeat: return page; } -/* - * We guarantee no failure on the returned page. - */ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { @@ -206,7 +203,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, } /* - * Readahead CP/NAT/SIT/SSA pages + * Readahead CP/NAT/SIT/SSA/POR pages */ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) @@ -898,7 +895,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) return -ENOMEM; /* * Finding out valid cp block involves read both - * sets( cp pack1 and cp pack 2) + * sets( cp pack 1 and cp pack 2) */ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr); cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); @@ -1385,10 +1382,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - /* - * modify checkpoint - * version number is already updated - */ + /* start to update checkpoint, cp ver is already updated previously */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { @@ -1541,9 +1535,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0; } -/* - * We guarantee that this checkpoint procedure will not fail. - */ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1611,7 +1602,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_sit_entries(sbi, cpc); - /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); if (err) f2fs_release_discard_addrs(sbi); @@ -1624,7 +1614,7 @@ stop: if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); - /* do checkpoint periodically */ + /* update CP_TIME to trigger checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index be4efce64987..8d97a774b42a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -364,9 +364,6 @@ static void f2fs_write_end_io(struct bio *bio) bio_put(bio); } -/* - * Return true, if pre_bio's bdev is same as its target device. - */ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -403,6 +400,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } +/* + * Return true, if pre_bio's bdev is same as its target device. + */ static bool __same_bdev(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { @@ -410,9 +410,6 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; } -/* - * Low-level block read/write IO operations. - */ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) { struct f2fs_sb_info *sbi = fio->sbi; @@ -1388,13 +1385,9 @@ void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) } /* - * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with - * f2fs_map_blocks structure. - * If original data blocks are allocated, then give them to blockdev. - * Otherwise, - * a. preallocate requested block addresses - * b. do not use extent cache for better performance - * c. give the block addresses to blockdev + * f2fs_map_blocks() tries to find or build mapping relationship which + * maps continuous logical blocks to physical blocks, and return such + * info via f2fs_map_blocks structure. */ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 63ab26a81cb6..a27cad9cd61b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2389,9 +2389,9 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); - /* from GC path only */ if (is_inode) { if (!inode) + /* from GC path only */ base = offset_in_addr(&raw_node->i); else if (f2fs_has_extra_attr(inode)) base = get_extra_isize(inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f51f4e9700ee..d9de259333de 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -112,7 +112,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } } - /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index db8725d473b5..608d04950f05 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -192,7 +192,10 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - /* we need to check every dirty segments in the FG_GC case */ + /* + * adjust candidates range, should select all dirty segments for + * foreground GC and urgent GC cases. + */ if (gc_type != FG_GC && (sbi->gc_mode != GC_URGENT) && p->max_search > sbi->max_victim_search) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a7c8a7347ff7..34b9e1fe1625 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -776,7 +776,7 @@ no_delete: else f2fs_inode_synced(inode); - /* ino == 0, if f2fs_new_inode() was failed t*/ + /* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */ if (inode->i_ino) invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2aa035422c0f..b75c70813f9e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -177,7 +177,7 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) } /* - * Set multimedia files as cold files for hot/cold data separation + * Set file's temperature for hot/cold data separation */ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 25ea3f6229bb..7da31760247b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -510,9 +510,6 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -/* - * This function always returns success - */ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -714,8 +711,7 @@ got: /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and - * f2fs_unlock_op() only if ro is not set RDONLY_NODE. - * In the case of RDONLY_NODE, we don't need to care about mutex. + * f2fs_unlock_op() only if mode is set with ALLOC_NODE. */ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index a467aca29cfe..d66de5999a26 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -58,7 +58,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count extent cache entries */ count += __count_extent_cache(sbi); - /* shrink clean nat cache entries */ + /* count clean nat cache entries */ count += __count_nat_entries(sbi); /* count free nids cache entries */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9a31a6fd34f8..004647c686e0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1666,7 +1666,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) out_unlock: up_write(&sbi->gc_lock); restore_flag: - sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ + sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } @@ -3619,7 +3619,7 @@ try_onemore: f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif - /* if there are nt orphan nodes free them */ + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) goto free_meta; From 55a5997c038e531e6f5d52bdd4b0a2e19e0da21b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Mar 2020 17:34:27 +0800 Subject: [PATCH 14/68] f2fs: fix to avoid using uninitialized variable In f2fs_vm_page_mkwrite(), if inode is compress one, and current mmapped page locates in compressed cluster, we have to call f2fs_get_dnode_of_data() to get its physical block address before f2fs_wait_on_block_writeback(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d9de259333de..a8483553383a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -106,10 +106,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) err = f2fs_get_block(&dn, page->index); f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; - } + } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (!need_alloc) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + f2fs_put_dnode(&dn); + } +#endif + if (err) { + unlock_page(page); + goto out_sem; } f2fs_wait_on_page_writeback(page, DATA, false, true); From 64f6384ef2c32c7895607bc4806b1d7ef9085534 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Feb 2020 18:08:46 +0800 Subject: [PATCH 15/68] f2fs: fix to avoid use-after-free in f2fs_write_multi_pages() In compress cluster, if physical block number is less than logic page number, race condition will cause use-after-free issue as described below: - f2fs_write_compressed_pages - fio.page = cic->rpages[0]; - f2fs_outplace_write_data - f2fs_compress_write_end_io - kfree(cic->rpages); - kfree(cic); - fio.page = cic->rpages[1]; f2fs_write_multi_pages+0xfd0/0x1a98 f2fs_write_data_pages+0x74c/0xb5c do_writepages+0x64/0x108 __writeback_single_inode+0xdc/0x4b8 writeback_sb_inodes+0x4d0/0xa68 __writeback_inodes_wb+0x88/0x178 wb_writeback+0x1f0/0x424 wb_workfn+0x2f4/0x574 process_one_work+0x210/0x48c worker_thread+0x2e8/0x44c kthread+0x110/0x120 ret_from_fork+0x10/0x18 Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 25bc6154c31c..411d1a5679ac 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -848,7 +848,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - fio.page = cic->rpages[i]; + fio.page = cc->rpages[i]; fio.old_blkaddr = blkaddr; /* cluster header */ From 71abd2acf1eee1c8d1877aeaa80404156d8a2cb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 5 Mar 2020 15:20:26 -0800 Subject: [PATCH 16/68] f2fs: fix wrong check on F2FS_IOC_FSSETXATTR This fixes the incorrect failure when enabling project quota on casefold-enabled file. Cc: Daniel Rosenberg Cc: kernel-team@android.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a8483553383a..d4a67bf3173a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1797,12 +1797,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) @@ -1816,9 +1819,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) return -EINVAL; } - if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) { + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { if (S_ISREG(inode->i_mode) && - (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) || + (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || F2FS_HAS_BLOCKS(inode))) return -EINVAL; if (iflags & F2FS_NOCOMP_FL) @@ -1835,8 +1838,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) { - if (fi->i_flags & F2FS_COMPR_FL) + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) return -EINVAL; } From 34303c6d0233641e84a28d81e91b90aea3bcd389 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:07 +0800 Subject: [PATCH 17/68] f2fs: fix to account compressed inode correctly stat_inc_compr_inode() needs to check FI_COMPRESSED_FILE flag, so in f2fs_disable_compressed_file(), we should call stat_dec_compr_inode() before clearing FI_COMPRESSED_FILE flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a27cad9cd61b..0b3640484426 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3823,8 +3823,8 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) return fi->i_compr_blocks; fi->i_flags &= ~F2FS_COMPR_FL; - clear_inode_flag(inode, FI_COMPRESSED_FILE); stat_dec_compr_inode(inode); + clear_inode_flag(inode, FI_COMPRESSED_FILE); return 0; } From 3da4dc032f6d951088bf4505503a0a7c64859d6c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:08 +0800 Subject: [PATCH 18/68] f2fs: fix to check dirty pages during compressed inode conversion Compressed cluster can be generated during dirty data writeback, if there is dirty pages on compressed inode, it needs to disable converting compressed inode to non-compressed one. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b3640484426..d8da797c03b9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3819,6 +3819,8 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return 0; + if (get_dirty_pages(inode)) + return 1; if (fi->i_compr_blocks) return fi->i_compr_blocks; From 8aa412921f6df5c4db792eeb18bda8ec362c13cf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:09 +0800 Subject: [PATCH 19/68] f2fs: allow to clear F2FS_COMPR_FL flag If regular inode has no compressed cluster, allow using 'chattr -c' to remove its compress flag, recovering it to a non-compressed file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d4a67bf3173a..6dc106223408 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1820,10 +1820,10 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) } if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { - if (S_ISREG(inode->i_mode) && - (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || - F2FS_HAS_BLOCKS(inode))) - return -EINVAL; + if (S_ISREG(inode->i_mode) && (masked_flags & F2FS_COMPR_FL)) { + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + } if (iflags & F2FS_NOCOMP_FL) return -EINVAL; if (iflags & F2FS_COMPR_FL) { From b48463133784f8512e2713ca4ae41780926b6ad8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 10:33:53 +0100 Subject: [PATCH 20/68] f2fs: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 60395cb26d6f..9174a7e8669b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -109,47 +109,47 @@ static ssize_t features_show(struct f2fs_attr *a, return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) - len += snprintf(buf, PAGE_SIZE - len, "%s", + len += scnprintf(buf, PAGE_SIZE - len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_compression(sbi)) - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "compression"); - len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "pin_file"); - len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -233,16 +233,16 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); return len; } From eb8bca0262a5b925cb196cfd5c79c0562885e9b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Feb 2020 19:08:16 -0800 Subject: [PATCH 21/68] f2fs: show mounted time Let's show mounted time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 5 +++++ fs/f2fs/debug.c | 3 +++ fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 2 +- fs/f2fs/sysfs.c | 8 ++++++++ 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 1a6cd5397129..bd8a0d19abe6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -318,3 +318,8 @@ Date: September 2019 Contact: "Hridya Valsaraju" Description: Average number of valid blocks. Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//mounted_time_sec +Date: February 2020 +Contact: "Jaegeuk Kim" +Description: Show the mounted time in secs of this partition. diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 6b89eae5e4ca..0dbcb0f9c019 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -301,6 +301,9 @@ static int stat_show(struct seq_file *s, void *v) si->ssa_area_segs, si->main_area_segs); seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", si->overp_segs, si->rsvd_segs); + seq_printf(s, "Current Time Sec: %llu / Mounted Time Sec: %llu\n\n", + ktime_get_boottime_seconds(), + SIT_I(si->sbi)->mounted_time); if (test_opt(si->sbi, DISCARD)) seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n", si->utilization, si->valid_count, si->discard_blks); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dea5e265c8cc..43990d4eca5f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4070,7 +4070,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = ktime_get_real_seconds(); + sit_i->mounted_time = ktime_get_boottime_seconds(); init_rwsem(&sit_i->sentry_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 459dc3901a57..7a83bd530812 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -756,7 +756,7 @@ static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi, bool base_time) { struct sit_info *sit_i = SIT_I(sbi); - time64_t diff, now = ktime_get_real_seconds(); + time64_t diff, now = ktime_get_boottime_seconds(); if (now >= sit_i->mounted_time) return sit_i->elapsed_time + now - sit_i->mounted_time; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9174a7e8669b..145e1d95b13b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -185,6 +185,12 @@ static ssize_t encoding_show(struct f2fs_attr *a, return sprintf(buf, "(none)"); } +static ssize_t mounted_time_sec_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time); +} + #ifdef CONFIG_F2FS_STAT_FS static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) @@ -544,6 +550,7 @@ F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +F2FS_GENERAL_RO_ATTR(mounted_time_sec); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -621,6 +628,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), + ATTR_LIST(mounted_time_sec), #ifdef CONFIG_F2FS_STAT_FS ATTR_LIST(cp_foreground_calls), ATTR_LIST(cp_background_calls), From 4e2f73316722c8c80ce40ceeedf33e6a36b74d11 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:10 +0800 Subject: [PATCH 22/68] f2fs: clean up codes with {f2fs_,}data_blkaddr() - rename datablock_addr() to data_blkaddr(). - wrap data_blkaddr() with f2fs_data_blkaddr() to clean up parameters. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++---- fs/f2fs/data.c | 12 +++++------- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 15 +++++---------- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 3 +-- fs/f2fs/recovery.c | 7 +++---- 7 files changed, 24 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 411d1a5679ac..5ef7d7cfc0e7 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -557,7 +557,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); if (blkaddr != NULL_ADDR) ret++; @@ -797,7 +797,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_unlock_op; for (i = 0; i < cc->cluster_size; i++) { - if (datablock_addr(dn.inode, dn.node_page, + if (data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i) == NULL_ADDR) goto out_put_dnode; } @@ -846,8 +846,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, - dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); fio.page = cc->rpages[i]; fio.old_blkaddr = blkaddr; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8d97a774b42a..33e65e975646 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1044,8 +1044,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { - block_t blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + block_t blkaddr = f2fs_data_blkaddr(dn); if (blkaddr == NULL_ADDR) { dn->data_blkaddr = NEW_ADDR; __set_data_blkaddr(dn); @@ -1297,8 +1296,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (err) return err; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr != NULL_ADDR) goto alloc; @@ -1460,7 +1458,7 @@ next_dnode: end_offset = ADDRS_PER_PAGE(dn.node_page, inode); next_block: - blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { @@ -2060,7 +2058,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -2089,7 +2087,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct page *page = dic->cpages[i]; block_t blkaddr; - blkaddr = datablock_addr(dn.inode, dn.node_page, + blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); if (bio && !page_is_mergeable(sbi, bio, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d8da797c03b9..88a6dd9d3441 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2379,7 +2379,7 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t datablock_addr(struct inode *inode, +static inline block_t data_blkaddr(struct inode *inode, struct page *node_page, unsigned int offset) { struct f2fs_node *raw_node; @@ -2401,6 +2401,11 @@ static inline block_t datablock_addr(struct inode *inode, return le32_to_cpu(addr_array[base + offset]); } +static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) +{ + return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node); +} + static inline int f2fs_test_bit(unsigned int nr, char *addr) { int mask; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6dc106223408..606433a46418 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -455,8 +455,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) data_ofs = (loff_t)pgofs << PAGE_SHIFT) { block_t blkaddr; - blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), @@ -1119,8 +1118,7 @@ next_dnode: done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { - *blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + *blkaddr = f2fs_data_blkaddr(&dn); if (__is_valid_data_blkaddr(*blkaddr) && !f2fs_is_valid_blkaddr(sbi, *blkaddr, @@ -1209,8 +1207,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, ADDRS_PER_PAGE(dn.node_page, dst_inode) - dn.ofs_in_node, len - i); do { - dn.data_blkaddr = datablock_addr(dn.inode, - dn.node_page, dn.ofs_in_node); + dn.data_blkaddr = f2fs_data_blkaddr(&dn); f2fs_truncate_data_blocks_range(&dn, 1); if (do_replace[i]) { @@ -1386,8 +1383,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, int ret; for (; index < end; index++, dn->ofs_in_node++) { - if (datablock_addr(dn->inode, dn->node_page, - dn->ofs_in_node) == NULL_ADDR) + if (f2fs_data_blkaddr(dn) == NULL_ADDR) count++; } @@ -1398,8 +1394,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, dn->ofs_in_node = ofs_in_node; for (index = start; index < end; index++, dn->ofs_in_node++) { - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); /* * f2fs_reserve_new_blocks will not guarantee entire block * allocation. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 608d04950f05..3fdc3d6a874c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -637,7 +637,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } *nofs = ofs_of_node(node_page); - source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); + source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7da31760247b..77904e0a3d54 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -803,8 +803,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->nid = nids[level]; dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; - dn->data_blkaddr = datablock_addr(dn->inode, - dn->node_page, dn->ofs_in_node); + dn->data_blkaddr = f2fs_data_blkaddr(dn); return 0; release_pages: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 76477f71d4ee..abe6d65a7a55 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -496,8 +496,7 @@ out: return 0; truncate_out: - if (datablock_addr(tdn.inode, tdn.node_page, - tdn.ofs_in_node) == blkaddr) + if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); if (dn->inode->i_ino == nid && !dn->inode_page_locked) unlock_page(dn->inode_page); @@ -560,8 +559,8 @@ retry_dn: for (; start < end; start++, dn.ofs_in_node++) { block_t src, dest; - src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); - dest = datablock_addr(dn.inode, page, dn.ofs_in_node); + src = f2fs_data_blkaddr(&dn); + dest = data_blkaddr(dn.inode, page, dn.ofs_in_node); if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { From 9a4089d4e98e7932745d1ad74ae6b5ed23d5fe50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:11 +0800 Subject: [PATCH 23/68] f2fs: clean up parameter of macro XATTR_SIZE() Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 10 ++++------ fs/f2fs/xattr.h | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 6794a13064be..d4800df6302d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -312,12 +312,12 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!xnid && !inline_size) return -ENODATA; - *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE; + *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; - last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode); + last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(inode); /* read from inline xattr */ if (inline_size) { @@ -539,7 +539,6 @@ out: ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); - nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; void *base_addr, *last_base_addr; int error = 0; @@ -551,7 +550,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -609,7 +608,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, { struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; - nid_t xnid = F2FS_I(inode)->i_xattr_nid; int found, newsize; size_t len; __u32 new_hsize; @@ -633,7 +631,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) return error; - last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + last_base_addr = (void *)base_addr + XATTR_SIZE(inode); /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index de0c600b9cab..574beea46494 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -73,7 +73,8 @@ struct f2fs_xattr_entry { entry = XATTR_NEXT_ENTRY(entry)) #define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) #define XATTR_PADDING_SIZE (sizeof(__u32)) -#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \ +#define XATTR_SIZE(i) ((F2FS_I(i)->i_xattr_nid ? \ + VALID_XATTR_BLOCK_SIZE : 0) + \ (inline_xattr_size(i))) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) From 22ccbf9620d879a46cd5bdcf4c3104a8b1733f34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:45:11 +0800 Subject: [PATCH 24/68] f2fs: fix to show norecovery mount option Previously, 'norecovery' mount option will be shown as 'disable_roll_forward', fix to show original option name correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 88a6dd9d3441..d369529c79c1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,6 +100,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 +#define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 004647c686e0..59e38353207a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -446,7 +446,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_norecovery: /* this option mounts f2fs with ro */ - set_opt(sbi, DISABLE_ROLL_FORWARD); + set_opt(sbi, NORECOVERY); if (!f2fs_readonly(sb)) return -EINVAL; break; @@ -1455,6 +1455,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); + if (test_opt(sbi, NORECOVERY)) + seq_puts(seq, ",norecovery"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); else @@ -3628,7 +3630,8 @@ try_onemore: goto reset_checkpoint; /* recover fsynced data */ - if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && + !test_opt(sbi, NORECOVERY)) { /* * mount should be failed, when device has readonly mode, and * previous checkpoint was not done by clean system shutdown. From 84a4563fdcc10a6ba4699acc770905d28dc2f6e2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:12 +0800 Subject: [PATCH 25/68] f2fs: clean up lfs/adaptive mount option This patch removes F2FS_MOUNT_ADAPTIVE and F2FS_MOUNT_LFS mount options, and add F2FS_OPTION.fs_mode with below two status to indicate filesystem mode. enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ }; It can enhance code readability and fs mode's scalability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/f2fs.h | 27 ++++++++++----------------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 12 ++++++------ fs/f2fs/super.c | 16 ++++++++-------- 6 files changed, 30 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 33e65e975646..1ca84aaf949e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -442,7 +442,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (test_opt(sbi, LFS) && current->plug) + if (f2fs_lfs_mode(sbi) && current->plug) blk_finish_plug(current->plug); if (F2FS_IO_ALIGNED(sbi)) @@ -1413,7 +1413,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1468,7 +1468,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); if (err) @@ -2388,7 +2388,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d369529c79c1..5cb275332be2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -91,8 +91,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_ADAPTIVE 0x00020000 -#define F2FS_MOUNT_LFS 0x00040000 #define F2FS_MOUNT_USRQUOTA 0x00080000 #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 @@ -139,6 +137,7 @@ struct f2fs_mount_info { int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ + int fs_mode; /* fs mode: LFS or ADAPTIVE */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1173,6 +1172,11 @@ enum { GC_URGENT, }; +enum { + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ +}; + enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ @@ -3909,20 +3913,9 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) return false; } - -static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) { - clear_opt(sbi, ADAPTIVE); - clear_opt(sbi, LFS); - - switch (mt) { - case F2FS_MOUNT_ADAPTIVE: - set_opt(sbi, ADAPTIVE); - break; - case F2FS_MOUNT_LFS: - set_opt(sbi, LFS); - break; - } + return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } static inline bool f2fs_may_encrypt(struct inode *inode) @@ -3977,7 +3970,7 @@ static inline int allow_outplace_dio(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - return (test_opt(sbi, LFS) && (rw == WRITE) && + return (f2fs_lfs_mode(sbi) && (rw == WRITE) && !block_unaligned_IO(inode, iocb, iter)); } @@ -3999,7 +3992,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, */ if (f2fs_sb_has_blkzoned(sbi)) return true; - if (test_opt(sbi, LFS) && (rw == WRITE)) { + if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { if (block_unaligned_IO(inode, iocb, iter)) return true; if (F2FS_IO_ALIGNED(sbi)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 606433a46418..6be6efd02a6b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1129,7 +1129,7 @@ next_dnode: if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { - if (test_opt(sbi, LFS)) { + if (f2fs_lfs_mode(sbi)) { f2fs_put_dnode(&dn); return -EOPNOTSUPP; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3fdc3d6a874c..d883943f7f46 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -765,7 +765,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct page *page, *mpage; block_t newaddr; int err = 0; - bool lfs_mode = test_opt(fio.sbi, LFS); + bool lfs_mode = f2fs_lfs_mode(fio.sbi); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43990d4eca5f..cbd55a96c3f0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -172,7 +172,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - if (test_opt(sbi, LFS)) + if (f2fs_lfs_mode(sbi)) return false; if (sbi->gc_mode == GC_URGENT) return true; @@ -1939,7 +1939,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1971,7 +1971,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { + if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2829,7 +2829,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); + bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3192,7 +3192,7 @@ static void update_device_state(struct f2fs_io_info *fio) static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio); - bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) down_read(&fio->sbi->io_order_lock); @@ -4420,7 +4420,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; - if (!test_opt(sbi, LFS)) + if (!f2fs_lfs_mode(sbi)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 59e38353207a..029d44bb6633 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -600,10 +600,10 @@ static int parse_options(struct super_block *sb, char *options) kvfree(name); return -EINVAL; } - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (strlen(name) == 3 && !strncmp(name, "lfs", 3)) { - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { kvfree(name); return -EINVAL; @@ -904,7 +904,7 @@ static int parse_options(struct super_block *sb, char *options) } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { + if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", F2FS_IO_SIZE_KB(sbi)); return -EINVAL; @@ -934,7 +934,7 @@ static int parse_options(struct super_block *sb, char *options) } } - if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) { f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1508,9 +1508,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",data_flush"); seq_puts(seq, ",mode="); - if (test_opt(sbi, ADAPTIVE)) + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_ADAPTIVE) seq_puts(seq, "adaptive"); - else if (test_opt(sbi, LFS)) + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) @@ -1597,9 +1597,9 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) - set_opt_mode(sbi, F2FS_MOUNT_LFS); + F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; else - set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); From 69bd1a226b4ad61766c5a2a2d5a8235eeb94a8ba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Feb 2020 17:44:13 +0800 Subject: [PATCH 26/68] f2fs: clean up bggc mount option There are three status for background gc: on, off and sync, it's a little bit confused to use test_opt(BG_GC) and test_opt(FORCE_FG_GC) combinations to indicate status of background gc. So let's remove F2FS_MOUNT_BG_GC and F2FS_MOUNT_FORCE_FG_GC mount options, and add F2FS_OPTION().bggc_mode with below three status to clean up codes and enhance bggc mode's scalability. enum { BGGC_MODE_ON, /* background gc is on */ BGGC_MODE_OFF, /* background gc is off */ BGGC_MODE_SYNC, /* * background gc is on, migrating blocks * like foreground gc */ }; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++++++-- fs/f2fs/gc.c | 6 +++++- fs/f2fs/super.c | 29 +++++++++++++---------------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5cb275332be2..e744a6e92872 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -74,7 +74,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_BG_GC 0x00000001 #define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 #define F2FS_MOUNT_DISCARD 0x00000004 #define F2FS_MOUNT_NOHEAP 0x00000008 @@ -88,7 +87,6 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -138,6 +136,7 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ int fs_mode; /* fs mode: LFS or ADAPTIVE */ + int bggc_mode; /* bggc mode: off, on or sync */ bool test_dummy_encryption; /* test dummy encryption */ block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint @@ -1172,6 +1171,15 @@ enum { GC_URGENT, }; +enum { + BGGC_MODE_ON, /* background gc is on */ + BGGC_MODE_OFF, /* background gc is off */ + BGGC_MODE_SYNC, /* + * background gc is on, migrating blocks + * like foreground gc + */ +}; + enum { FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_LFS, /* use lfs allocation only */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d883943f7f46..a19966b0e018 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,6 +31,8 @@ static int gc_thread_func(void *data) set_freezable(); do { + bool sync_mode; + wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || gc_th->gc_wake, @@ -101,8 +103,10 @@ static int gc_thread_func(void *data) do_gc: stat_inc_bggc_count(sbi->stat_info); + sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 029d44bb6633..8c152b4dd783 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -427,14 +427,11 @@ static int parse_options(struct super_block *sb, char *options) if (!name) return -ENOMEM; if (strlen(name) == 2 && !strncmp(name, "on", 2)) { - set_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { - clear_opt(sbi, BG_GC); - clear_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) { - set_opt(sbi, BG_GC); - set_opt(sbi, FORCE_FG_GC); + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { kvfree(name); return -EINVAL; @@ -1445,14 +1442,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) { - if (test_opt(sbi, FORCE_FG_GC)) - seq_printf(seq, ",background_gc=%s", "sync"); - else - seq_printf(seq, ",background_gc=%s", "on"); - } else { + if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) + seq_printf(seq, ",background_gc=%s", "sync"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_ON) + seq_printf(seq, ",background_gc=%s", "on"); + else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); - } + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1584,8 +1580,8 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; - set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -1791,7 +1787,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if background_gc = off is passed in mount * option. Also sync the filesystem. */ - if ((*flags & SB_RDONLY) || !test_opt(sbi, BG_GC)) { + if ((*flags & SB_RDONLY) || + F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3686,7 +3683,7 @@ reset_checkpoint: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { + if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) From f66cb558628e5f43b6e6cdab4f3d83ad90bf3d4c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:27:09 -0800 Subject: [PATCH 27/68] f2fs: add migration count iff migration happens If first segment is empty and migration_granularity is 1, we can't move this at all. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a19966b0e018..bb0b6435dcf7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1240,12 +1240,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); + migrated++; freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; - migrated++; if (__is_large_section(sbi) && segno + 1 < end_segno) sbi->next_victim_seg[gc_type] = segno + 1; From b5fe8c8f613b31f7184baf803da479c1ccd2c149 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:23:28 -0800 Subject: [PATCH 28/68] f2fs: skip GC when section is full This fixes skipping GC when segment is full in large section. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bb0b6435dcf7..0dfdec4926e4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1025,8 +1025,8 @@ next_step: * race condition along with SSR block allocation. */ if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || - get_valid_blocks(sbi, segno, false) == - sbi->blocks_per_seg) + get_valid_blocks(sbi, segno, true) == + BLKS_PER_SEC(sbi)) return submitted; if (check_valid_map(sbi, segno, off) == 0) From 43b1774309e527734e24861ae4c1f39d23747388 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Feb 2020 17:45:44 +0800 Subject: [PATCH 29/68] f2fs: introduce DEFAULT_IO_TIMEOUT As Geert Uytterhoeven reported: for parameter HZ/50 in congestion_wait(BLK_RW_ASYNC, HZ/50); On some platforms, HZ can be less than 50, then unexpected 0 timeout jiffies will be set in congestion_wait(). This patch introduces a macro DEFAULT_IO_TIMEOUT to wrap a determinate value with msecs_to_jiffies(20) to instead HZ/50 to avoid such issue. Quoted from Geert Uytterhoeven: "A timeout of HZ means 1 second. HZ/50 means 20 ms, but has the risk of being zero, if HZ < 50. If you want to use a timeout of 20 ms, you best use msecs_to_jiffies(20), as that takes care of the special cases, and never returns 0." Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/compress.c | 3 ++- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 3 ++- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 5 +++-- fs/f2fs/segment.c | 10 ++++++---- fs/f2fs/super.c | 6 ++++-- 10 files changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0dea31f1ddad..46fc9c1542fe 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1260,7 +1260,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (unlikely(f2fs_cp_error(sbi))) break; - io_schedule_timeout(HZ/50); + io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5ef7d7cfc0e7..748933813cd1 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -988,7 +988,8 @@ retry_write: } else if (ret == -EAGAIN) { ret = 0; cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1ca84aaf949e..58a3273ba7f8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2317,7 +2317,7 @@ retry_encrypt: /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { f2fs_flush_merged_writes(fio->sbi); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } @@ -2908,7 +2908,7 @@ result: if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + DEFAULT_IO_TIMEOUT); goto retry_write; } goto next; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e744a6e92872..d35e45728863 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -558,6 +558,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* congestion wait timeout value, default: 20ms */ +#define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) + /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 0dfdec4926e4..f6958ae8c157 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -977,7 +977,8 @@ retry: if (err) { clear_cold_data(page); if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } if (is_dirty) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 34b9e1fe1625..2c3078f4381b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -535,7 +535,7 @@ retry: inode = f2fs_iget(sb, ino); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 77904e0a3d54..bf06b8ff47e9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2599,7 +2599,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index abe6d65a7a55..71a44d8bd959 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -534,7 +534,7 @@ retry_dn: err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto retry_dn; } goto out; @@ -617,7 +617,8 @@ retry_prev: err = check_index_in_prev_nodes(sbi, dest, &dn); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry_prev; } goto err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cbd55a96c3f0..f0cf0383fd2e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -245,7 +245,8 @@ retry: LOOKUP_NODE); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -312,7 +313,7 @@ next: skip: iput(inode); } - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); cond_resched(); if (gc_failure) { if (++looped >= count) @@ -415,7 +416,8 @@ retry: err = f2fs_do_write_data_page(&fio); if (err) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); cond_resched(); goto retry; } @@ -2800,7 +2802,7 @@ next: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); trimmed += __wait_all_discard_cmd(sbi, NULL); - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); goto next; } skip: diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c152b4dd783..61213989d329 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1893,7 +1893,8 @@ repeat: page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); @@ -1947,7 +1948,8 @@ retry: &page, NULL); if (unlikely(err)) { if (err == -ENOMEM) { - congestion_wait(BLK_RW_ASYNC, HZ/50); + congestion_wait(BLK_RW_ASYNC, + DEFAULT_IO_TIMEOUT); goto retry; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); From da03b357285254d0d5eb46ccf3674c7265ca125b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Feb 2020 17:46:20 +0800 Subject: [PATCH 30/68] f2fs: add prefix for f2fs slab cache name In order to avoid polluting global slab cache namespace. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 8 ++++---- fs/f2fs/segment.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 58a3273ba7f8..3166ebcd3bd3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3846,7 +3846,7 @@ void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) int __init f2fs_init_bio_entry_cache(void) { - bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); if (!bio_entry_slab) return -ENOMEM; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bf06b8ff47e9..6ad8f6f8ad89 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3190,22 +3190,22 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_node_manager_caches(void) { - nat_entry_slab = f2fs_kmem_cache_create("nat_entry", + nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry", sizeof(struct nat_entry)); if (!nat_entry_slab) goto fail; - free_nid_slab = f2fs_kmem_cache_create("free_nid", + free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid", sizeof(struct free_nid)); if (!free_nid_slab) goto destroy_nat_entry; - nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", + nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) goto destroy_free_nid; - fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry", + fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry", sizeof(struct fsync_node_entry)); if (!fsync_node_entry_slab) goto destroy_nat_entry_set; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f0cf0383fd2e..776636faba67 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4574,22 +4574,22 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) int __init f2fs_create_segment_manager_caches(void) { - discard_entry_slab = f2fs_kmem_cache_create("discard_entry", + discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry", sizeof(struct discard_entry)); if (!discard_entry_slab) goto fail; - discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd", + discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd", sizeof(struct discard_cmd)); if (!discard_cmd_slab) goto destroy_discard_entry; - sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", + sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destroy_discard_cmd; - inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", + inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry", sizeof(struct inmem_pages)); if (!inmem_entry_slab) goto destroy_sit_entry_set; From 276d74a444cba16d51909b209a7c78cad33c83b7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Feb 2020 18:21:35 +0800 Subject: [PATCH 31/68] f2fs: fix to avoid triggering IO in write path If we are in write IO path, we need to avoid using GFP_KERNEL. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 24 +++++++++++++----------- fs/f2fs/f2fs.h | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 748933813cd1..bd3ea01db448 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -656,7 +656,7 @@ retry: struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false); + &last_block_in_bio, false, true); f2fs_destroy_compress_ctx(cc); if (ret) goto release_pages; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3166ebcd3bd3..b22cedd4c48b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -925,14 +925,15 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx) + pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; struct bio_post_read_ctx *ctx; unsigned int post_read_steps = 0; - bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), + for_write); if (!bio) return ERR_PTR(-ENOMEM); f2fs_target_device(sbi, blkaddr, bio); @@ -967,12 +968,12 @@ static void f2fs_release_read_bio(struct bio *bio) /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr) + block_t blkaddr, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1158,7 +1159,7 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write); if (err) goto put_err; return page; @@ -1971,7 +1972,8 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index); + is_readahead ? REQ_RAHEAD : 0, page->index, + false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2006,7 +2008,7 @@ out: #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead) + bool is_readahead, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2100,7 +2102,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index); + page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2201,7 +2203,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); if (ret) goto set_error_page; @@ -2244,7 +2246,7 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead); + is_readahead, false); f2fs_destroy_compress_ctx(&cc); } } @@ -3274,7 +3276,7 @@ repeat: err = -EFSCORRUPTED; goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr); + err = f2fs_submit_page_read(inode, page, blkaddr, true); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d35e45728863..e1aa9aec2935 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3795,7 +3795,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead); + bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_free_dic(struct decompress_io_ctx *dic); void f2fs_decompress_end_io(struct page **rpages, From 3b4f64cba2d320ca09cbab4d33cd1ec4a0597f2d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 21 Feb 2020 18:09:21 +0800 Subject: [PATCH 32/68] f2fs: introduce F2FS_IOC_GET_COMPRESS_BLOCKS With this newly introduced interface, user can get block number compression saved in target inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e1aa9aec2935..a9032c76cb95 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -426,6 +426,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) #define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL #define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6be6efd02a6b..ad9277051e82 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3401,6 +3401,21 @@ out: return err; } +static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u64 blocks; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + blocks = F2FS_I(inode)->i_compr_blocks; + return put_user(blocks, (u64 __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3479,6 +3494,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_get_volume_name(filp, arg); case F2FS_IOC_SET_VOLUME_NAME: return f2fs_set_volume_name(filp, arg); + case F2FS_IOC_GET_COMPRESS_BLOCKS: + return f2fs_get_compress_blocks(filp, arg); default: return -ENOTTY; } @@ -3635,6 +3652,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_MEASURE_VERITY: case F2FS_IOC_GET_VOLUME_NAME: case F2FS_IOC_SET_VOLUME_NAME: + case F2FS_IOC_GET_COMPRESS_BLOCKS: break; default: return -ENOIOCTLCMD; From 3ab0ac2637e81450283717ecde63b68b04862437 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Feb 2020 18:21:34 +0800 Subject: [PATCH 33/68] f2fs: avoid __GFP_NOFAIL in f2fs_bio_alloc __f2fs_bio_alloc() won't fail due to memory pool backend, remove unneeded __GFP_NOFAIL flag in __f2fs_bio_alloc(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++-------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b22cedd4c48b..98fbc85df094 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -54,17 +54,13 @@ static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, return bio_alloc_bioset(gfp_mask, nr_iovecs, &f2fs_bioset); } -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio) { - struct bio *bio; - - if (no_fail) { + if (noio) { /* No failure on bio allocation */ - bio = __f2fs_bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + return __f2fs_bio_alloc(GFP_NOIO, npages); } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a9032c76cb95..e19a8910b6f2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3340,7 +3340,7 @@ void f2fs_destroy_checkpoint_caches(void); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); -struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_bio(struct f2fs_sb_info *sbi, From d5550eb4c82d110722bacfedbade2f100fc76dff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 24 Feb 2020 19:20:15 +0800 Subject: [PATCH 34/68] f2fs: fix to show tracepoint correctly Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ad9277051e82..93180850558d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3523,8 +3523,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - if (!f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) { From fccbe1109668aab728028db1f855f09a7d7074f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Feb 2020 13:28:45 -0800 Subject: [PATCH 35/68] f2fs: skip migration only when BG_GC is called FG_GC needs to move entire section more quickly. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f6958ae8c157..9ead93fcf78a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1211,7 +1211,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (get_valid_blocks(sbi, segno, false) == 0) goto freed; - if (__is_large_section(sbi) && + if (gc_type == BG_GC && __is_large_section(sbi) && migrated >= sbi->migration_granularity) goto skip; if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) From bccbdb31c839fd3f7fed3f51ed89bdf875b4625d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Feb 2020 18:17:10 +0800 Subject: [PATCH 36/68] f2fs: use kmem_cache pool during inline xattr lookups It's been observed that kzalloc() on lookup_all_xattrs() are called millions of times on Android, quickly becoming the top abuser of slub memory allocator. Use a dedicated kmem cache pool for xattr lookups to mitigate this. Signed-off-by: Park Ju Hyung Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 10 ++++++++- fs/f2fs/xattr.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/xattr.h | 4 ++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e19a8910b6f2..c5f9c08b9ee8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1487,6 +1487,9 @@ struct f2fs_sb_info { __u32 s_chksum_seed; struct workqueue_struct *post_read_wq; /* post read workqueue */ + + struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ + unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ }; struct f2fs_private_dio { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 61213989d329..110668b71c22 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1210,6 +1210,7 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->raw_super); destroy_device_list(sbi); + f2fs_destroy_xattr_caches(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -3487,12 +3488,17 @@ try_onemore: } } + /* init per sbi slab cache */ + err = f2fs_init_xattr_caches(sbi); + if (err) + goto free_io_dummy; + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_io_dummy; + goto free_xattr_cache; } err = f2fs_get_valid_checkpoint(sbi); @@ -3754,6 +3760,8 @@ free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); sbi->meta_inode = NULL; +free_xattr_cache: + f2fs_destroy_xattr_caches(sbi); free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index d4800df6302d..4f6582ef7ee3 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -23,6 +23,25 @@ #include "xattr.h" #include "segment.h" +static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline) +{ + if (likely(size == sbi->inline_xattr_slab_size)) { + *is_inline = true; + return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS); + } + *is_inline = false; + return f2fs_kzalloc(sbi, size, GFP_NOFS); +} + +static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr, + bool is_inline) +{ + if (is_inline) + kmem_cache_free(sbi->inline_xattr_slab, xattr_addr); + else + kvfree(xattr_addr); +} + static int f2fs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) @@ -301,7 +320,8 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr, int *base_size) + void **base_addr, int *base_size, + bool *is_inline) { void *cur_addr, *txattr_addr, *last_txattr_addr; void *last_addr = NULL; @@ -313,7 +333,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, return -ENODATA; *base_size = XATTR_SIZE(inode) + XATTR_PADDING_SIZE; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); + txattr_addr = xattr_alloc(F2FS_I_SB(inode), *base_size, is_inline); if (!txattr_addr) return -ENOMEM; @@ -362,7 +382,7 @@ check: *base_addr = txattr_addr; return 0; out: - kvfree(txattr_addr); + xattr_free(F2FS_I_SB(inode), txattr_addr, *is_inline); return err; } @@ -499,6 +519,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, unsigned int size, len; void *base_addr = NULL; int base_size; + bool is_inline; if (name == NULL) return -EINVAL; @@ -509,7 +530,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr, &base_size); + &entry, &base_addr, &base_size, &is_inline); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -532,7 +553,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, } error = size; out: - kvfree(base_addr); + xattr_free(F2FS_I_SB(inode), base_addr, is_inline); return error; } @@ -764,3 +785,26 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_update_time(sbi, REQ_TIME); return err; } + +int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + char slab_name[32]; + + sprintf(slab_name, "f2fs_xattr_entry-%u:%u", MAJOR(dev), MINOR(dev)); + + sbi->inline_xattr_slab_size = F2FS_OPTION(sbi).inline_xattr_size * + sizeof(__le32) + XATTR_PADDING_SIZE; + + sbi->inline_xattr_slab = f2fs_kmem_cache_create(slab_name, + sbi->inline_xattr_slab_size); + if (!sbi->inline_xattr_slab) + return -ENOMEM; + + return 0; +} + +void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) +{ + kmem_cache_destroy(sbi->inline_xattr_slab); +} diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 574beea46494..0153b4c9ef21 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -131,6 +131,8 @@ extern int f2fs_setxattr(struct inode *, int, const char *, extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); +extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL @@ -151,6 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } +static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY From 80636b5dad4a88b4b109cd79e7e9655e3255955f Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 3 Mar 2020 19:59:25 +0530 Subject: [PATCH 37/68] f2fs: Fix mount failure due to SPO after a successful online resize FS Even though online resize is successfully done, a SPO immediately after resize, still causes below error in the next mount. [ 11.294650] F2FS-fs (sda8): Wrong user_block_count: 2233856 [ 11.300272] F2FS-fs (sda8): Failed to get valid F2FS checkpoint This is because after FS metadata is updated in update_fs_metadata() if the SBI_IS_DIRTY is not dirty, then CP will not be done to reflect the new user_block_count. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9ead93fcf78a..bc203e24a039 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1578,11 +1578,17 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) goto out; } + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->cp_mutex); + err = f2fs_sync_fs(sbi->sb, 1); if (err) { + mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); + mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } From 31a63f7f4939ddb9f459091906217e65196b49b8 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 3 Mar 2020 19:59:26 +0530 Subject: [PATCH 38/68] f2fs: Add a new CP flag to help fsck fix resize SPO issues Add and set a new CP flag CP_RESIZEFS_FLAG during online resize FS to help fsck fix the metadata mismatch that may happen due to SPO during resize, where SB got updated but CP data couldn't be written yet. fsck errors - Info: CKPT version = 6ed7bccb Wrong user_block_count(2233856) [f2fs_do_mount:3365] Checkpoint is polluted Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++++-- include/linux/f2fs_fs.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 46fc9c1542fe..852890b72d6a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1298,10 +1298,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || - is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) + __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + else + __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); else diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ac3f4888b3df..3c383ddd92dd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -125,6 +125,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_RESIZEFS_FLAG 0x00004000 #define CP_DISABLED_QUICK_FLAG 0x00002000 #define CP_DISABLED_FLAG 0x00001000 #define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 From 938e0c7ed3f501cb21be09e0c899bffc8f578cbe Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 20:09:25 +0800 Subject: [PATCH 39/68] f2fs: fix to update f2fs_super_block fields under sb_lock Fields in struct f2fs_super_block should be updated under coverage of sb_lock, fix to adjust update_sb_metadata() for that rule. Fixes: 04f0b2eaa3b3 ("f2fs: ioctl for removing a range from F2FS") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bc203e24a039..f122fe3dbba3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1442,12 +1442,19 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) { struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); - int section_count = le32_to_cpu(raw_sb->section_count); - int segment_count = le32_to_cpu(raw_sb->segment_count); - int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); - long long block_count = le64_to_cpu(raw_sb->block_count); + int section_count; + int segment_count; + int segment_count_main; + long long block_count; int segs = secs * sbi->segs_per_sec; + down_write(&sbi->sb_lock); + + section_count = le32_to_cpu(raw_sb->section_count); + segment_count = le32_to_cpu(raw_sb->segment_count); + segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + block_count = le64_to_cpu(raw_sb->block_count); + raw_sb->section_count = cpu_to_le32(section_count + secs); raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); @@ -1461,6 +1468,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->devs[last_dev].total_segments = cpu_to_le32(dev_segs + segs); } + + up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) From 2c24a79a6e703738b8702dad7aca9d156c5dd8a7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Mar 2020 13:10:08 -0500 Subject: [PATCH 40/68] f2fs: xattr.h: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 0153b4c9ef21..e471be77f8f0 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -49,7 +49,7 @@ struct f2fs_xattr_entry { __u8 e_name_index; __u8 e_name_len; __le16 e_value_size; /* size of attribute value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) From 7309a89cb652bb9778664efd73df7e2b18b0fdb8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Mar 2020 10:45:29 +0800 Subject: [PATCH 41/68] f2fs: fix to account compressed blocks in f2fs_compressed_blocks() por_fsstress reports inconsistent status in orphan inode, the root cause of this is in f2fs_write_raw_pages() we decrease i_compr_blocks incorrectly due to wrong calculation in f2fs_compressed_blocks(). So this patch exposes below two functions based on __f2fs_cluster_blocks: - f2fs_compressed_blocks: get count of compressed blocks in compressed cluster - f2fs_cluster_blocks: get count of valid blocks (including reserved blocks) in compressed cluster. Then use f2fs_compress_blocks() to get correct compressed blocks count in f2fs_write_raw_pages(). sanity_check_inode: inode (ino=ad80) hash inconsistent i_compr_blocks:2, i_blocks:1, run fsck to fix Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index bd3ea01db448..f9cf75fe4a12 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -535,8 +535,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc) return true; } -/* return # of compressed block addresses */ -static int f2fs_compressed_blocks(struct compress_ctx *cc) +static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) { struct dnode_of_data dn; int ret; @@ -559,8 +558,13 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i); - if (blkaddr != NULL_ADDR) - ret++; + if (compr) { + if (__is_valid_data_blkaddr(blkaddr)) + ret++; + } else { + if (blkaddr != NULL_ADDR) + ret++; + } } } fail: @@ -568,6 +572,18 @@ fail: return ret; } +/* return # of compressed blocks in compressed cluster */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + return __f2fs_cluster_blocks(cc, true); +} + +/* return # of valid blocks in compressed cluster */ +static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +{ + return __f2fs_cluster_blocks(cc, false); +} + int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { struct compress_ctx cc = { @@ -577,7 +593,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, }; - return f2fs_compressed_blocks(&cc); + return f2fs_cluster_blocks(&cc, false); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -626,7 +642,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_compressed_blocks(cc); + ret = f2fs_cluster_blocks(cc, false); if (ret <= 0) return ret; From 4176ae4d114ea99238068da90559f4c28bfd4e9d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 18 Mar 2020 19:40:45 +0800 Subject: [PATCH 42/68] f2fs: don't mark compressed inode dirty during f2fs_iget() - f2fs_iget - do_read_inode - set_inode_flag(, FI_COMPRESSED_FILE) - __mark_inode_dirty_flag(, true) It's unnecessary, so let's just mark compressed inode dirty while compressed inode conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c5f9c08b9ee8..36cf5843b15f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2574,7 +2574,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: - case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -3836,6 +3835,7 @@ static inline void set_compress_context(struct inode *inode) F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); + f2fs_mark_inode_dirty_sync(inode, true); } static inline u64 f2fs_disable_compressed_file(struct inode *inode) @@ -3852,6 +3852,7 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } From a8238998a7759e3864efb8a4a9ca0e9408ceed5c Mon Sep 17 00:00:00 2001 From: DongDongJu Date: Fri, 20 Mar 2020 15:01:32 +0900 Subject: [PATCH 43/68] f2fs: delete DIO read lock This lock can be a contention with multi 4k random read IO with single inode. example) fio --output=test --name=test --numjobs=60 --filename=/media/samsung960pro/file_test --rw=randread --bs=4k --direct=1 --time_based --runtime=7 --ioengine=libaio --iodepth=256 --group_reporting --size=10G With this commit, it remove that possible lock contention. Signed-off-by: Dongjoo Seo Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 98fbc85df094..ae6e8a3df9be 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3453,7 +3453,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, rw == WRITE ? get_data_block_dio_write : get_data_block_dio, NULL, f2fs_dio_submit_bio, - DIO_LOCKING | DIO_SKIP_HOLES); + rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES : + DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); From 86e53e02d45ab3d0bff8065bd64eeeb79e9907f4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 20 Mar 2020 18:14:31 +0800 Subject: [PATCH 44/68] f2fs: fix potential deadlock on compressed quota file generic/232 reports below deadlock: fsstress D 0 96980 96969 0x00084000 Call Trace: schedule+0x4a/0xb0 io_schedule+0x12/0x40 __lock_page+0x127/0x1d0 pagecache_get_page+0x1d8/0x250 prepare_compress_overwrite+0xe0/0x490 [f2fs] f2fs_prepare_compress_overwrite+0x5d/0x80 [f2fs] f2fs_write_begin+0x833/0xb90 [f2fs] f2fs_quota_write+0x145/0x1e0 [f2fs] write_blk+0x36/0x80 [quota_tree] do_insert_tree+0x2ac/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] qtree_write_dquot+0x70/0x190 [quota_tree] v2_write_dquot+0x43/0x90 [quota_v2] dquot_acquire+0x77/0x100 f2fs_dquot_acquire+0x2f/0x60 [f2fs] dqget+0x310/0x450 dquot_transfer+0xb2/0x120 f2fs_setattr+0x11a/0x4a0 [f2fs] notify_change+0x349/0x480 chown_common+0x168/0x1c0 do_fchownat+0xbc/0xf0 __x64_sys_lchown+0x21/0x30 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 task PC stack pid father kworker/u256:0 D 0 103444 2 0x80084000 Workqueue: writeback wb_workfn (flush-251:1) Call Trace: schedule+0x4a/0xb0 schedule_timeout+0x15e/0x2f0 io_schedule_timeout+0x19/0x40 congestion_wait+0x7e/0x120 f2fs_write_multi_pages+0x12a/0x840 [f2fs] f2fs_write_cache_pages+0x48f/0x790 [f2fs] f2fs_write_data_pages+0x2db/0x330 [f2fs] do_writepages+0x1a/0x60 __writeback_single_inode+0x3d/0x340 writeback_sb_inodes+0x225/0x4a0 wb_writeback+0xf7/0x320 wb_workfn+0xba/0x470 process_one_work+0x16c/0x3f0 worker_thread+0x4c/0x440 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 fsstress D 0 5277 5266 0x00084000 Call Trace: schedule+0x4a/0xb0 rwsem_down_write_slowpath+0x29d/0x540 block_operations+0x105/0x360 [f2fs] f2fs_write_checkpoint+0x101/0x1010 [f2fs] f2fs_sync_fs+0xa8/0x130 [f2fs] f2fs_do_sync_file+0x1ad/0x890 [f2fs] do_fsync+0x38/0x60 __x64_sys_fdatasync+0x13/0x20 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The root cause is there is potential deadlock between quota data update and writeback. Kworker Thread B Thread C - f2fs_write_cache_pages - lock whole cluster --- A - f2fs_write_multi_pages - f2fs_write_raw_pages - f2fs_write_single_data_page - f2fs_do_write_data_page - f2fs_setattr - f2fs_lock_op --- B - f2fs_write_checkpoint - block_operations - f2fs_lock_all --- B - dquot_transfer - f2fs_quota_write - f2fs_prepare_compress_overwrite - pagecache_get_page --- A - f2fs_trylock_op failed --- B - congestion_wait - goto rewrite To fix this issue, during quota file writeback, just redirty all pages left in cluster rather holding pages' lock in cluster and looping retrying lock cp_rwsem. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index f9cf75fe4a12..00fd6f904139 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1002,6 +1002,15 @@ retry_write: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + /* + * for quota file, just redirty left pages to + * avoid deadlock caused by cluster update race + * from foreground operation. + */ + if (IS_NOQUOTA(cc->inode)) { + err = 0; + goto out_err; + } ret = 0; cond_resched(); congestion_wait(BLK_RW_ASYNC, @@ -1011,16 +1020,12 @@ retry_write: goto retry_write; } err = ret; - goto out_fail; + goto out_err; } *submitted += _submitted; } return 0; - -out_fail: - /* TODO: revoke partially updated block addresses */ - BUG_ON(compr_blocks); out_err: for (++i; i < cc->cluster_size; i++) { if (!cc->rpages[i]) From 177f29e56d825d8bee34b6ad9fe3a61644c5964d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 20 Mar 2020 18:17:54 +0800 Subject: [PATCH 45/68] f2fs: don't change inode status under page lock In order to shrink page lock coverage. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 331c90556a0f..7c843196b9eb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -850,12 +850,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, 0); set_page_dirty(page); - dir->i_ctime = dir->i_mtime = current_time(dir); - f2fs_mark_inode_dirty_sync(dir, false); - - if (inode) - f2fs_drop_nlink(dir, inode); - if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, page->index, page->index + 1)) { f2fs_clear_radix_tree_dirty_tag(page); @@ -867,6 +861,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_remove_dirty_inode(dir); } f2fs_put_page(page, 1); + + dir->i_ctime = dir->i_mtime = current_time(dir); + f2fs_mark_inode_dirty_sync(dir, false); + + if (inode) + f2fs_drop_nlink(dir, inode); } bool f2fs_empty_dir(struct inode *dir) From c29342a7366b625c9e221499f808e09047f7cd0a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:57:57 +0800 Subject: [PATCH 46/68] f2fs: fix to avoid potential deadlock We should always check F2FS_I(inode)->cp_task condition in prior to other conditions in __should_serialize_io() to avoid deadloop described in commit 040d2bb318d1 ("f2fs: fix to avoid deadloop if data_flush is on"), however we break this rule when we support compression, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ae6e8a3df9be..65a135e87b5f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2962,15 +2962,17 @@ next: static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { - if (!S_ISREG(inode->i_mode)) - return false; - if (f2fs_compressed_file(inode)) - return true; - if (IS_NOQUOTA(inode)) - return false; /* to avoid deadlock in path of data flush */ if (F2FS_I(inode)->cp_task) return false; + + if (!S_ISREG(inode->i_mode)) + return false; + if (IS_NOQUOTA(inode)) + return false; + + if (f2fs_compressed_file(inode)) + return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) From 6363e99bdd96b173ffdb61b0f371a7fdf8ef9135 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:19:33 +0800 Subject: [PATCH 47/68] f2fs: clean up f2fs_may_encrypt() Merge below two conditions into f2fs_may_encrypt() for cleanup - IS_ENCRYPTED() - DUMMY_ENCRYPTION_ENABLED() Check IS_ENCRYPTED(inode) condition in f2fs_init_inode_metadata() is enough since we have already set encrypt flag in f2fs_new_inode(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 +--- fs/f2fs/f2fs.h | 13 +++++++++---- fs/f2fs/namei.c | 4 +--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7c843196b9eb..227bf48abdfd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -471,7 +471,6 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; - int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -498,8 +497,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if ((IS_ENCRYPTED(dir) || dummy_encrypt) && - f2fs_may_encrypt(inode)) { + if (IS_ENCRYPTED(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 36cf5843b15f..6714675b5b82 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3934,15 +3934,20 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_may_encrypt(struct inode *inode) +static inline bool f2fs_may_encrypt(struct inode *dir, struct inode *inode) { #ifdef CONFIG_FS_ENCRYPTION + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); umode_t mode = inode->i_mode; - return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); -#else - return false; + /* + * If the directory encrypted or dummy encryption enabled, + * then we should encrypt the inode. + */ + if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) + return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)); #endif + return false; } static inline bool f2fs_may_compress(struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b75c70813f9e..95cfbce062e8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -75,9 +75,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); - /* If the directory encrypted, then we should encrypt the inode. */ - if ((IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && - f2fs_may_encrypt(inode)) + if (f2fs_may_encrypt(dir, inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi)) { From 161dc59687d98bdd218a57f47a71304bbb41cd4c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:58:00 +0800 Subject: [PATCH 48/68] f2fs: fix NULL pointer dereference in f2fs_write_begin() BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:f2fs_write_begin+0x823/0xb90 [f2fs] Call Trace: f2fs_quota_write+0x139/0x1d0 [f2fs] write_blk+0x36/0x80 [quota_tree] get_free_dqblk+0x42/0xa0 [quota_tree] do_insert_tree+0x235/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] do_insert_tree+0x26e/0x4a0 [quota_tree] qtree_write_dquot+0x70/0x190 [quota_tree] v2_write_dquot+0x43/0x90 [quota_v2] dquot_acquire+0x77/0x100 f2fs_dquot_acquire+0x2f/0x60 [f2fs] dqget+0x310/0x450 dquot_transfer+0x7e/0x120 f2fs_setattr+0x11a/0x4a0 [f2fs] notify_change+0x349/0x480 chown_common+0x168/0x1c0 do_fchownat+0xbc/0xf0 __x64_sys_fchownat+0x20/0x30 do_syscall_64+0x5f/0x220 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Passing fsdata parameter to .write_{begin,end} in f2fs_quota_write(), so that if quota file is compressed one, we can avoid above NULL pointer dereference when updating quota content. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 110668b71c22..5ae256fd8825 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1937,6 +1937,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, int offset = off & (sb->s_blocksize - 1); size_t towrite = len; struct page *page; + void *fsdata = NULL; char *kaddr; int err = 0; int tocopy; @@ -1946,7 +1947,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, towrite); retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, - &page, NULL); + &page, &fsdata); if (unlikely(err)) { if (err == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, @@ -1963,7 +1964,7 @@ retry: flush_dcache_page(page); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, - page, NULL); + page, fsdata); offset = 0; towrite -= tocopy; off += tocopy; From b5979af64b103a758d7d1ebf8fd9caf7145f7d76 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Mar 2020 19:57:58 +0800 Subject: [PATCH 49/68] f2fs: don't trigger data flush in foreground operation Data flush can generate heavy IO and cause long latency during flush, so it's not appropriate to trigger it in foreground operation. And also, we may face below potential deadlock during data flush: - f2fs_write_multi_pages - f2fs_write_raw_pages - f2fs_write_single_data_page - f2fs_balance_fs - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - filemap_fdatawrite -- stuck on flush same cluster Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6714675b5b82..4fd058400f46 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3237,7 +3237,7 @@ void f2fs_drop_inmem_pages(struct inode *inode); void f2fs_drop_inmem_page(struct inode *inode, struct page *page); int f2fs_commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg); int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f122fe3dbba3..26248c8936db 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -113,7 +113,7 @@ do_gc: prefree_segments(sbi), free_segments(sbi)); /* balancing f2fs's metadata periodically */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); next: sb_end_write(sbi->sb); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6ad8f6f8ad89..29d4ee7e6ea0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1976,7 +1976,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, goto skip_write; /* balancing f2fs's metadata in background */ - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, true); /* collect a number of dirty node pages and write together */ if (wbc->sync_mode != WB_SYNC_ALL && diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 776636faba67..1065b19afe17 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -496,7 +496,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) /* balance_fs_bg is able to be pending */ if (need && excess_cached_nats(sbi)) - f2fs_balance_fs_bg(sbi); + f2fs_balance_fs_bg(sbi, false); if (!f2fs_is_checkpoint_ready(sbi)) return; @@ -511,7 +511,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } } -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) { if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return; @@ -540,7 +540,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH)) { + if (test_opt(sbi, DATA_FLUSH) && from_bg) { struct blk_plug plug; mutex_lock(&sbi->flush_lock); From cde00bd8c4da05937678c465cb89fbac16706121 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:23:27 +0800 Subject: [PATCH 50/68] f2fs: don't call fscrypt_get_encryption_info() explicitly in f2fs_tmpfile() In f2fs_tmpfile(), parent inode's encryption info is only used when inheriting encryption context to its child inode, however, we have already called fscrypt_get_encryption_info() in fscrypt_inherit_context() to get the encryption info, so just removing unneeded one in f2fs_tmpfile(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 95cfbce062e8..f54119da2217 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -874,12 +874,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - if (IS_ENCRYPTED(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { - int err = fscrypt_get_encryption_info(dir); - if (err) - return err; - } - return __f2fs_tmpfile(dir, dentry, mode, NULL); } From 43fb2655f536382fe68781366edad6140aee8c34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Mar 2020 20:24:11 +0800 Subject: [PATCH 51/68] f2fs: fix to clear PG_error if fsverity failed In f2fs_decompress_end_io(), we should clear PG_error flag before page unlock, otherwise reread will fail due to the flag as described in commit fb7d70db305a ("f2fs: clear PageError on the read path"). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 00fd6f904139..d66d372d104d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1187,15 +1187,17 @@ void f2fs_decompress_end_io(struct page **rpages, if (!rpage) continue; - if (err || PageError(rpage)) { - ClearPageUptodate(rpage); - ClearPageError(rpage); - } else { - if (!verity || fsverity_verify_page(rpage)) - SetPageUptodate(rpage); - else - SetPageError(rpage); + if (err || PageError(rpage)) + goto clear_uptodate; + + if (!verity || fsverity_verify_page(rpage)) { + SetPageUptodate(rpage); + goto unlock; } +clear_uptodate: + ClearPageUptodate(rpage); + ClearPageError(rpage); +unlock: unlock_page(rpage); } } From 3f66ccc7703651e974b53d36991e08f07dd2c252 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2020 17:43:04 +0800 Subject: [PATCH 52/68] f2fs: fix NULL pointer dereference in f2fs_verity_work() If both compression and fsverity feature is on, generic/572 will report below NULL pointer dereference bug. BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:f2fs_verity_work+0x60/0x90 [f2fs] #PF: supervisor read access in kernel mode Workqueue: fsverity_read_queue f2fs_verity_work [f2fs] RIP: 0010:f2fs_verity_work+0x60/0x90 [f2fs] Call Trace: process_one_work+0x16c/0x3f0 worker_thread+0x4c/0x440 ? rescuer_thread+0x350/0x350 kthread+0xf8/0x130 ? kthread_unpark+0x70/0x70 ret_from_fork+0x35/0x40 There are two issue in f2fs_verity_work(): - it needs to traverse and verify all pages in bio. - if pages in bio belong to non-compressed cluster, accessing decompress IO context stored in page private will cause NULL pointer dereference. Fix them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ fs/f2fs/data.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d66d372d104d..adac6fcb2830 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -477,6 +477,8 @@ out_vunmap_cbuf: out_vunmap_rbuf: vunmap(dic->rbuf); out_free_dic: + if (verity) + refcount_add(dic->nr_cpages - 1, &dic->ref); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 65a135e87b5f..0c6107ca524e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -187,12 +187,37 @@ static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) static void f2fs_verify_bio(struct bio *bio) { - struct page *page = bio_first_page_all(bio); - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct bio_vec *bv; + int i; - f2fs_verify_pages(dic->rpages, dic->cluster_size); - f2fs_free_dic(dic); + bio_for_each_segment_all(bv, bio, i) { + struct page *page = bv->bv_page; + struct decompress_io_ctx *dic; + + dic = (struct decompress_io_ctx *)page_private(page); + + if (dic) { + if (refcount_dec_not_one(&dic->ref)) + continue; + f2fs_verify_pages(dic->rpages, + dic->cluster_size); + f2fs_free_dic(dic); + continue; + } + + if (bio->bi_status || PageError(page)) + goto clear_uptodate; + + if (fsverity_verify_page(page)) { + SetPageUptodate(page); + goto unlock; + } +clear_uptodate: + ClearPageUptodate(page); + ClearPageError(page); +unlock: + unlock_page(page); + } } #endif From 3f80cfee4ad5658c1150d2bdee9a50fac8b7828a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2020 11:18:07 +0800 Subject: [PATCH 53/68] f2fs: fix potential .flags overflow on 32bit architecture f2fs_inode_info.flags is unsigned long variable, it has 32 bits in 32bit architecture, since we introduced FI_MMAP_FILE flag when we support data compression, we may access memory cross the border of .flags field, corrupting .i_sem field, result in below deadlock. To fix this issue, let's expand .flags as an array to grab enough space to store new flags. Call Trace: __schedule+0x8d0/0x13fc ? mark_held_locks+0xac/0x100 schedule+0xcc/0x260 rwsem_down_write_slowpath+0x3ab/0x65d down_write+0xc7/0xe0 f2fs_drop_nlink+0x3d/0x600 [f2fs] f2fs_delete_inline_entry+0x300/0x440 [f2fs] f2fs_delete_entry+0x3a1/0x7f0 [f2fs] f2fs_unlink+0x500/0x790 [f2fs] vfs_unlink+0x211/0x490 do_unlinkat+0x483/0x520 sys_unlink+0x4a/0x70 do_fast_syscall_32+0x12b/0x683 entry_SYSENTER_32+0xaa/0x102 Fixes: 4c8ff7095bef ("f2fs: support data compression") Tested-by: Ondrej Jirman Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 99 ++++++++++++++++++++++++------------------------- fs/f2fs/inode.c | 2 +- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4fd058400f46..61500edaa580 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -678,6 +678,44 @@ enum { MAX_GC_FAILURE }; +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ + FI_FREE_NID, /* free allocated nide */ + FI_NO_EXTENT, /* not to use the extent cache */ + FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ + FI_VOLATILE_FILE, /* indicate volatile file */ + FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ + FI_DO_DEFRAG, /* indicate defragment is running */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ + FI_EXTRA_ATTR, /* indicate file has extra attribute */ + FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ + FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ + FI_MAX, /* max flag, never be used */ +}; + struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ @@ -690,7 +728,7 @@ struct f2fs_inode_info { umode_t i_acl_mode; /* keep file acl mode temporarily */ /* Use below internally in f2fs*/ - unsigned long flags; /* use to pass per-file flags */ + unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -2523,43 +2561,6 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) return flags & F2FS_OTHER_FLMASK; } -/* used for f2fs_inode_info->flags */ -enum { - FI_NEW_INODE, /* indicate newly allocated inode */ - FI_DIRTY_INODE, /* indicate inode is dirty or not */ - FI_AUTO_RECOVER, /* indicate inode is recoverable */ - FI_DIRTY_DIR, /* indicate directory has dirty pages */ - FI_INC_LINK, /* need to increment i_nlink */ - FI_ACL_MODE, /* indicate acl mode */ - FI_NO_ALLOC, /* should not allocate any blocks */ - FI_FREE_NID, /* free allocated nide */ - FI_NO_EXTENT, /* not to use the extent cache */ - FI_INLINE_XATTR, /* used for inline xattr */ - FI_INLINE_DATA, /* used for inline data*/ - FI_INLINE_DENTRY, /* used for inline dentry */ - FI_APPEND_WRITE, /* inode has appended data */ - FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used for ipu per file */ - FI_ATOMIC_FILE, /* indicate atomic file */ - FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ - FI_VOLATILE_FILE, /* indicate volatile file */ - FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ - FI_DROP_CACHE, /* drop dirty page cache */ - FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ - FI_HOT_DATA, /* indicate file is hot */ - FI_EXTRA_ATTR, /* indicate file has extra attribute */ - FI_PROJ_INHERIT, /* indicate file inherits projectid */ - FI_PIN_FILE, /* indicate file should not be gced */ - FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ - FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ - FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ - FI_MMAP_FILE, /* indicate file was mmapped */ -}; - static inline void __mark_inode_dirty_flag(struct inode *inode, int flag, bool set) { @@ -2580,20 +2581,18 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, static inline void set_inode_flag(struct inode *inode, int flag) { - if (!test_bit(flag, &F2FS_I(inode)->flags)) - set_bit(flag, &F2FS_I(inode)->flags); + test_and_set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) { - return test_bit(flag, &F2FS_I(inode)->flags); + return test_bit(flag, F2FS_I(inode)->flags); } static inline void clear_inode_flag(struct inode *inode, int flag) { - if (test_bit(flag, &F2FS_I(inode)->flags)) - clear_bit(flag, &F2FS_I(inode)->flags); + test_and_clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } @@ -2684,19 +2683,19 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) struct f2fs_inode_info *fi = F2FS_I(inode); if (ri->i_inline & F2FS_INLINE_XATTR) - set_bit(FI_INLINE_XATTR, &fi->flags); + set_bit(FI_INLINE_XATTR, fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_bit(FI_INLINE_DATA, &fi->flags); + set_bit(FI_INLINE_DATA, fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_bit(FI_INLINE_DENTRY, &fi->flags); + set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_bit(FI_DATA_EXIST, &fi->flags); + set_bit(FI_DATA_EXIST, fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, &fi->flags); + set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) - set_bit(FI_EXTRA_ATTR, &fi->flags); + set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) - set_bit(FI_PIN_FILE, &fi->flags); + set_bit(FI_PIN_FILE, fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c3078f4381b..be6ac33461d1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -362,7 +362,7 @@ static int do_read_inode(struct inode *inode) fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) fi->i_flags &= ~F2FS_PROJINHERIT_FL; - fi->flags = 0; + bitmap_zero(fi->flags, FI_MAX); fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; From 419f5233596fa74f172e808bc373e85689f1e82a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Mar 2020 14:20:57 +0800 Subject: [PATCH 54/68] f2fs: fix to avoid double unlock On image that has verity and compression feature, if compressed pages and non-compressed pages are mixed in one bio, we may double unlock non-compressed page in below flow: - f2fs_post_read_work - f2fs_decompress_work - f2fs_decompress_bio - __read_end_io - unlock_page - fsverity_enqueue_verify_work - f2fs_verity_work - f2fs_verify_bio - unlock_page So it should skip handling non-compressed page in f2fs_decompress_work() if verity is on. Besides, add missing dec_page_count() in f2fs_verify_bio(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0c6107ca524e..d03e5681e5d8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -139,6 +139,8 @@ static void __read_end_io(struct bio *bio, bool compr, bool verity) f2fs_decompress_pages(bio, page, verity); continue; } + if (verity) + continue; #endif /* PG_error was set if any post_read step failed */ @@ -216,6 +218,7 @@ clear_uptodate: ClearPageUptodate(page); ClearPageError(page); unlock: + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } } From 0049e2ec3c32af9eef176f7b560fc0b99bec825d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 28 Mar 2020 19:27:36 +0800 Subject: [PATCH 55/68] f2fs: xattr.h: Make stub helpers inline Fix gcc warnings: In file included from fs/f2fs/dir.c:15:0: fs/f2fs/xattr.h:157:13: warning: 'f2fs_destroy_xattr_caches' defined but not used [-Wunused-function] static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } ^~~~~~~~~~~~~~~~~~~~~~~~~ fs/f2fs/xattr.h:156:12: warning: 'f2fs_init_xattr_caches' defined but not used [-Wunused-function] static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } Reported-by: Hulk Robot Fixes: a999150f4fe3 ("f2fs: use kmem_cache pool during inline xattr lookups") Signed-off-by: YueHaibing Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index e471be77f8f0..938fcd20565d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -153,8 +153,8 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, { return -EOPNOTSUPP; } -static int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } -static void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } +static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY From 221397fb937a7dda0bffa10ad050fc2fc67963ad Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Mar 2020 18:03:15 +0800 Subject: [PATCH 56/68] f2fs: fix to use f2fs_readpage_limit() in f2fs_read_multi_pages() Multipage read flow should consider fsverity, so it needs to use f2fs_readpage_limit() instead of i_size_read() to check EOF condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d03e5681e5d8..b67d1e8ce65f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2048,7 +2048,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + last_block_in_file = (f2fs_readpage_limit(inode) + + blocksize - 1) >> blkbits; /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { From 60fb6dd25378dd56e9327b22cbe0705a927d1ec6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Mar 2020 17:33:23 +0800 Subject: [PATCH 57/68] f2fs: clean up {cic,dic}.ref handling {cic,dic}.ref should be initialized to number of compressed pages, let's initialize it directly rather than doing w/ f2fs_set_compressed_page(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index adac6fcb2830..fde4af20c523 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -52,7 +52,7 @@ bool f2fs_is_compressed_page(struct page *page) } static void f2fs_set_compressed_page(struct page *page, - struct inode *inode, pgoff_t index, void *data, refcount_t *r) + struct inode *inode, pgoff_t index, void *data) { SetPagePrivate(page); set_page_private(page, (unsigned long)data); @@ -60,8 +60,6 @@ static void f2fs_set_compressed_page(struct page *page, /* i_crypto_info and iv index */ page->index = index; page->mapping = inode->i_mapping; - if (r) - refcount_inc(r); } static void f2fs_put_compressed_page(struct page *page) @@ -478,7 +476,7 @@ out_vunmap_rbuf: vunmap(dic->rbuf); out_free_dic: if (verity) - refcount_add(dic->nr_cpages - 1, &dic->ref); + refcount_set(&dic->ref, dic->nr_cpages); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); @@ -834,7 +832,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; - refcount_set(&cic->ref, 1); + refcount_set(&cic->ref, cc->nr_cpages); cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << cc->log_cluster_size, GFP_NOFS); if (!cic->rpages) @@ -844,8 +842,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, - cic, i ? &cic->ref : NULL); + cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; if (fio.encrypted) { fio.page = cc->rpages[i + 1]; @@ -1095,7 +1092,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; dic->inode = cc->inode; - refcount_set(&dic->ref, 1); + refcount_set(&dic->ref, cc->nr_cpages); dic->cluster_idx = cc->cluster_idx; dic->cluster_size = cc->cluster_size; dic->log_cluster_size = cc->log_cluster_size; @@ -1119,8 +1116,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; f2fs_set_compressed_page(page, cc->inode, - start_idx + i + 1, - dic, i ? &dic->ref : NULL); + start_idx + i + 1, dic); dic->cpages[i] = page; } From b2a95f00bbe4dd23aab9672f69ebaf9880a96373 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Mar 2020 20:50:05 +0800 Subject: [PATCH 58/68] f2fs: change default compression algorithm Use LZ4 as default compression algorithm, as compared to LZO, it shows almost the same compression ratio and much better decompression speed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5ae256fd8825..765d18e22aa4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1578,7 +1578,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).test_dummy_encryption = false; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; From 577ae6d57da179643bc5a2843b22167d04686959 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 16:57:06 +0800 Subject: [PATCH 59/68] f2fs: compress: fix to call missing destroy_compress_ctx() Otherwise, it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index fde4af20c523..1e5bbb9c1d3e 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -396,6 +396,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cpages[i] = NULL; } + cops->destroy_compress_ctx(cc); + cc->nr_cpages = nr_cpages; trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, From 9a18d5f17e293c39a6279a721f68a2cf237d7df6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 16:57:07 +0800 Subject: [PATCH 60/68] f2fs: compress: add .{init,destroy}_decompress_ctx callback Add below two callback interfaces in struct f2fs_compress_ops: int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); Which will be used by zstd compress algorithm later. In addition, this patch adds callback function pointer check, so that specified algorithm can avoid defining unneeded functions. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1e5bbb9c1d3e..13666735f567 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -20,6 +20,8 @@ struct f2fs_compress_ops { int (*init_compress_ctx)(struct compress_ctx *cc); void (*destroy_compress_ctx)(struct compress_ctx *cc); int (*compress_pages)(struct compress_ctx *cc); + int (*init_decompress_ctx)(struct decompress_io_ctx *dic); + void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); }; @@ -332,9 +334,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc) trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, cc->cluster_size, fi->i_compress_algorithm); - ret = cops->init_compress_ctx(cc); - if (ret) - goto out; + if (cops->init_compress_ctx) { + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + } max_len = COMPRESS_HEADER_SIZE + cc->clen; cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); @@ -396,7 +400,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cpages[i] = NULL; } - cops->destroy_compress_ctx(cc); + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); cc->nr_cpages = nr_cpages; @@ -416,7 +421,8 @@ out_free_cpages: kfree(cc->cpages); cc->cpages = NULL; destroy_compress_ctx: - cops->destroy_compress_ctx(cc); + if (cops->destroy_compress_ctx) + cops->destroy_compress_ctx(cc); out: trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, cc->clen, ret); @@ -450,10 +456,16 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) goto out_free_dic; } + if (cops->init_decompress_ctx) { + ret = cops->init_decompress_ctx(dic); + if (ret) + goto out_free_dic; + } + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); if (!dic->rbuf) { ret = -ENOMEM; - goto out_free_dic; + goto destroy_decompress_ctx; } dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); @@ -476,6 +488,9 @@ out_vunmap_cbuf: vunmap(dic->cbuf); out_vunmap_rbuf: vunmap(dic->rbuf); +destroy_decompress_ctx: + if (cops->destroy_decompress_ctx) + cops->destroy_decompress_ctx(dic); out_free_dic: if (verity) refcount_set(&dic->ref, dic->nr_cpages); From 735528dbf5cff7694bb0e51d0a69ea7313eb1a53 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 3 Mar 2020 17:46:02 +0800 Subject: [PATCH 61/68] f2fs: compress: support zstd compress algorithm Add zstd compress algorithm support, use "compress_algorithm=zstd" mountoption to enable it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 4 +- fs/f2fs/Kconfig | 9 ++ fs/f2fs/compress.c | 165 +++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 5 + fs/f2fs/super.c | 7 ++ include/trace/events/f2fs.h | 3 +- 6 files changed, 190 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4eb3e2ddd00e..b1a66cf0e967 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,8 +235,8 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. -compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" - and "lz4" algorithm. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", + "lz4" and "zstd" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 2dd1bbca241c..79eed58d9d69 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -126,3 +126,12 @@ config F2FS_FS_LZ4 default y help Support LZ4 compress algorithm, if unsure, say Y. + +config F2FS_FS_ZSTD + bool "ZSTD compression support" + depends on F2FS_FS_COMPRESSION + select ZSTD_COMPRESS + select ZSTD_DECOMPRESS + default y + help + Support ZSTD compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 13666735f567..c7284dd7d52f 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -291,6 +292,165 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_ZSTD +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + +static int zstd_init_compress_ctx(struct compress_ctx *cc) +{ + ZSTD_parameters params; + ZSTD_CStream *stream; + void *workspace; + unsigned int workspace_size; + + params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0); + workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + + workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + cc->private = workspace; + cc->private2 = stream; + + cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE; + return 0; +} + +static void zstd_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; + cc->private2 = NULL; +} + +static int zstd_compress_pages(struct compress_ctx *cc) +{ + ZSTD_CStream *stream = cc->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int src_size = cc->rlen; + int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; + int ret; + + inbuf.pos = 0; + inbuf.src = cc->rbuf; + inbuf.size = src_size; + + outbuf.pos = 0; + outbuf.dst = cc->cbuf->cdata; + outbuf.size = dst_size; + + ret = ZSTD_compressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + ret = ZSTD_endStream(stream, &outbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + cc->clen = outbuf.pos; + return 0; +} + +static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream; + void *workspace; + unsigned int workspace_size; + + workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE); + + workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), + workspace_size, GFP_NOFS); + if (!workspace) + return -ENOMEM; + + stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE, + workspace, workspace_size); + if (!stream) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__); + kvfree(workspace); + return -EIO; + } + + dic->private = workspace; + dic->private2 = stream; + + return 0; +} + +static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) +{ + kvfree(dic->private); + dic->private = NULL; + dic->private2 = NULL; +} + +static int zstd_decompress_pages(struct decompress_io_ctx *dic) +{ + ZSTD_DStream *stream = dic->private2; + ZSTD_inBuffer inbuf; + ZSTD_outBuffer outbuf; + int ret; + + inbuf.pos = 0; + inbuf.src = dic->cbuf->cdata; + inbuf.size = dic->clen; + + outbuf.pos = 0; + outbuf.dst = dic->rbuf; + outbuf.size = dic->rlen; + + ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); + if (ZSTD_isError(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + __func__, ZSTD_getErrorCode(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { + printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + + return 0; +} + +static const struct f2fs_compress_ops f2fs_zstd_ops = { + .init_compress_ctx = zstd_init_compress_ctx, + .destroy_compress_ctx = zstd_destroy_compress_ctx, + .compress_pages = zstd_compress_pages, + .init_decompress_ctx = zstd_init_decompress_ctx, + .destroy_decompress_ctx = zstd_destroy_decompress_ctx, + .decompress_pages = zstd_decompress_pages, +}; +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -302,6 +462,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#ifdef CONFIG_F2FS_FS_ZSTD + &f2fs_zstd_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 61500edaa580..1a0991053b5f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1267,6 +1267,7 @@ enum fsync_mode { enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, + COMPRESS_ZSTD, COMPRESS_MAX, }; @@ -1296,6 +1297,7 @@ struct compress_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ void *private; /* payload buffer for specified compression algorithm */ + void *private2; /* extra payload buffer */ }; /* compress context for write IO path */ @@ -1325,11 +1327,14 @@ struct decompress_io_ctx { size_t clen; /* valid data length in cbuf */ refcount_t ref; /* referrence count of compressed page */ bool failed; /* indicate IO error during decompression */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ }; #define NULL_CLUSTER ((unsigned int)(~0)) #define MIN_COMPRESS_LOG_SIZE 2 #define MAX_COMPRESS_LOG_SIZE 8 +#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE) struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 765d18e22aa4..1f90dbf6d493 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -829,6 +829,10 @@ static int parse_options(struct super_block *sb, char *options) !strcmp(name, "lz4")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + } else if (strlen(name) == 4 && + !strcmp(name, "zstd")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_ZSTD; } else { kfree(name); return -EINVAL; @@ -1427,6 +1431,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_LZ4: algtype = "lz4"; break; + case COMPRESS_ZSTD: + algtype = "zstd"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ad989c7cba90..ada9d991d283 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -163,7 +163,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); #define show_compress_algorithm(type) \ __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ - { COMPRESS_LZ4, "LZ4" }) + { COMPRESS_LZ4, "LZ4" }, \ + { COMPRESS_ZSTD, "ZSTD" }) struct f2fs_sb_info; struct f2fs_io_info; From 178fea745de7f8bffe22ac85e089e07a365f1698 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Mar 2020 17:25:07 +0800 Subject: [PATCH 62/68] f2fs: clean up dic->tpages assignment Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c7284dd7d52f..ca4f54fbbd2e 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1308,20 +1308,16 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) goto out_free; for (i = 0; i < dic->cluster_size; i++) { - if (cc->rpages[i]) + if (cc->rpages[i]) { + dic->tpages[i] = cc->rpages[i]; continue; + } dic->tpages[i] = f2fs_grab_page(); if (!dic->tpages[i]) goto out_free; } - for (i = 0; i < dic->cluster_size; i++) { - if (dic->tpages[i]) - continue; - dic->tpages[i] = cc->rpages[i]; - } - return dic; out_free: From 7334507aa07f21dbee9706cd9ed06e5ffea32c39 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Mar 2020 10:22:09 +0800 Subject: [PATCH 63/68] f2fs: show compression in statx fstest reports below message when compression is on: generic/424 1s ... - output mismatch --- tests/generic/424.out +++ results/generic/424.out.bad @@ -1,2 +1,26 @@ QA output created by 424 +[!] Attribute compressed should be set +Failed +stat_test failed +[!] Attribute compressed should be set +Failed +stat_test failed We missed to set STATX_ATTR_COMPRESSED on compressed inode in getattr(), fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 93180850558d..db2b3e6674d9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -799,6 +799,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, } flags = fi->i_flags; + if (flags & F2FS_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; if (IS_ENCRYPTED(inode)) @@ -810,7 +812,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, if (IS_VERITY(inode)) stat->attributes |= STATX_ATTR_VERITY; - stat->attributes_mask |= (STATX_ATTR_APPEND | + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | + STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP | From ea49be2aa01b07e15b11b4c06ff5c6a73619d5ea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Mar 2020 17:42:26 +0800 Subject: [PATCH 64/68] f2fs: fix to verify tpage before releasing in f2fs_free_dic() In below error path, tpages[i] could be NULL, fix to check it before releasing it. - f2fs_read_multi_pages - f2fs_alloc_dic - f2fs_free_dic Fixes: 61fbae2b2b12 ("f2fs: fix to avoid NULL pointer dereference") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ca4f54fbbd2e..df7b2d15eacd 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1333,6 +1333,8 @@ void f2fs_free_dic(struct decompress_io_ctx *dic) for (i = 0; i < dic->cluster_size; i++) { if (dic->rpages[i]) continue; + if (!dic->tpages[i]) + continue; unlock_page(dic->tpages[i]); put_page(dic->tpages[i]); } From 55a8a8446f113560a296772722732023b3830913 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Mar 2020 17:43:56 +0800 Subject: [PATCH 65/68] f2fs: switch discard_policy.timeout to bool type While checking discard timeout, we use specified type UMOUNT_DISCARD_TIMEOUT, so just replace doplicy.timeout with it, and switch doplicy.timeout to bool type. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1a0991053b5f..c9a91bd2e1e4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -330,8 +330,8 @@ struct discard_policy { bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ + bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ - int timeout; /* discard timeout for put_super */ }; struct discard_cmd_control { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1065b19afe17..2158f27fc701 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1080,7 +1080,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = 0; + dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; @@ -1105,6 +1105,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ dpolicy->granularity = 1; + dpolicy->timeout = true; } } @@ -1473,12 +1474,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, int i, issued = 0; bool io_interrupted = false; - if (dpolicy->timeout != 0) - f2fs_update_time(sbi, dpolicy->timeout); + if (dpolicy->timeout) + f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (i + 1 < dpolicy->granularity) @@ -1499,8 +1500,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (dpolicy->timeout != 0 && - f2fs_time_over(sbi, dpolicy->timeout)) + if (dpolicy->timeout && + f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; if (dpolicy->io_aware && i < dpolicy->io_aware_gran && @@ -1679,7 +1680,6 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); - dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT; __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); From 60a370f9eb0a0fa7fe914f3fa1ec6aca07ff80bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:00 +0800 Subject: [PATCH 66/68] f2fs: add missing CONFIG_F2FS_FS_COMPRESSION Compression sysfs node should not be shown if f2fs module disables compression feature. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 145e1d95b13b..1a4be654f208 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -580,7 +580,9 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +#ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); +#endif #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -661,7 +663,9 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), +#ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compression), +#endif NULL, }; From e776a15de388b18d0543c29b0d024b34d83d597d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:51 +0800 Subject: [PATCH 67/68] f2fs: fix to disable compression on directory It needs to call f2fs_disable_compressed_file() to disable compression on directory. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 10 ++++++---- fs/f2fs/file.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c9a91bd2e1e4..32d7d48e4518 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3848,10 +3848,12 @@ static inline u64 f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return 0; - if (get_dirty_pages(inode)) - return 1; - if (fi->i_compr_blocks) - return fi->i_compr_blocks; + if (S_ISREG(inode->i_mode)) { + if (get_dirty_pages(inode)) + return 1; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index db2b3e6674d9..80ce584e1d29 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1818,7 +1818,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) } if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { - if (S_ISREG(inode->i_mode) && (masked_flags & F2FS_COMPR_FL)) { + if (masked_flags & F2FS_COMPR_FL) { if (f2fs_disable_compressed_file(inode)) return -EINVAL; } From cb4336ea93ee48676d76cf2382f0ba6b9dd20c4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Mar 2020 18:29:52 +0800 Subject: [PATCH 68/68] f2fs: keep inline_data when compression conversion We can keep compressed inode's data inline before inline conversion. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 80ce584e1d29..82c00c933d94 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1825,11 +1825,6 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (iflags & F2FS_NOCOMP_FL) return -EINVAL; if (iflags & F2FS_COMPR_FL) { - int err = f2fs_convert_inline_inode(inode); - - if (err) - return err; - if (!f2fs_may_compress(inode)) return -EINVAL;