From 1591748418b242ddff4838a51f8fd21c94329b30 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 18 Sep 2018 20:39:53 +0800 Subject: [PATCH 01/45] f2fs: avoid GC causing encrypted file corrupted The encrypted file may be corrupted by GC in following case: Time 1: | segment 1 blkaddr = A | GC -> | segment 2 blkaddr = B | Encrypted block 1 is moved from blkaddr A of segment 1 to blkaddr B of segment 2, Time 2: | segment 1 blkaddr = B | GC -> | segment 3 blkaddr = C | Before page 1 is written back and if segment 2 become a victim, then page 1 is moved from blkaddr B of segment 2 to blkaddr Cof segment 3, during the GC process of Time 2, f2fs should wait for page 1 written back before reading it, or move_data_block will read a garbage block from blkaddr B since page is not written back to blkaddr B yet. Commit 6aa58d8a ("f2fs: readahead encrypted block during GC") introduce ra_data_block to read encrypted block, but it forgets to add f2fs_wait_on_page_writeback to avoid racing between GC and flush. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a07241fb8537..c96e7c6354ef 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -658,6 +658,14 @@ got_it: fio.page = page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; + /* + * don't cache encrypted data into meta inode until previous dirty + * data were writebacked to avoid racing between GC and flush. + */ + f2fs_wait_on_page_writeback(page, DATA, true); + + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi), dn.data_blkaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); @@ -745,6 +753,8 @@ static int move_data_block(struct inode *inode, block_t bidx, */ f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); if (err) goto put_out; From 9d94d677c2f49cf7a96d98d63c6b7d3c59fa5786 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Oct 2018 17:24:10 +0800 Subject: [PATCH 02/45] f2fs: fix to account preflush command for noflush_merge mode Previously, we only account preflush command for flush_merge mode, so for noflush_merge mode, we can not know in-flight preflush command count, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6edcf8391dd3..d161f5a8b3d6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -620,7 +620,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { + atomic_inc(&fcc->issing_flush); ret = submit_flush_wait(sbi, ino); + atomic_dec(&fcc->issing_flush); atomic_inc(&fcc->issued_flush); return ret; } From 0864616aabee9abb0046df352cd346b9e22a130f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 24 Oct 2018 16:08:30 +0800 Subject: [PATCH 03/45] f2fs: fix count of seg_freed to make sec_freed correct When sbi->segs_per_sec > 1, and if some segno has 0 valid blocks before gc starts, do_garbage_collect will skip counting seg_freed++, and this will cause seg_freed < sbi->segs_per_sec and finally skip sec_freed++. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c96e7c6354ef..9707773fdaac 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1140,9 +1140,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, false) == 0 || - !PageUptodate(sum_page) || - unlikely(f2fs_cp_error(sbi))) + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) goto next; sum = page_address(sum_page); @@ -1170,6 +1170,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, stat_inc_seg_count(sbi, type, gc_type); +freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; From 74a52615c92854ed7f26a9802b1cb13fad41ad37 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 24 Oct 2018 16:09:42 +0800 Subject: [PATCH 04/45] f2fs: remove codes of unused wio_mutex Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/super.c | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 56204a8f8a12..4dfa28ddd56c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1170,7 +1170,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; /* bio ordering for NODE/DATA */ /* keep migration IO order for LFS mode */ struct rw_semaphore io_order_lock; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index af58b2cc21b8..2d18de5e7c3f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2674,7 +2674,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) static void init_sb_info(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = sbi->raw_super; - int i, j; + int i; sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); @@ -2710,9 +2710,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - for (i = 0; i < NR_PAGE_TYPE - 1; i++) - for (j = HOT; j < NR_TEMP_TYPE; j++) - mutex_init(&sbi->wio_mutex[i][j]); init_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); From d912f2965de6e49e262441233544c0d0a4917d00 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Oct 2018 18:34:26 +0800 Subject: [PATCH 05/45] f2fs: clean up f2fs_sb_has_##feature_name In F2FS_HAS_FEATURE(), we will use F2FS_SB(sb) to get sbi pointer to access .raw_super field, to avoid unneeded pointer conversion, this patch changes to F2FS_HAS_FEATURE() accept sbi parameter directly. Just do cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 23 ++++++++++---------- fs/f2fs/file.c | 13 +++++------ fs/f2fs/inode.c | 18 +++++++-------- fs/f2fs/namei.c | 6 ++--- fs/f2fs/node.c | 6 ++--- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 52 ++++++++++++++++++++++---------------------- fs/f2fs/sysfs.c | 20 ++++++++--------- 10 files changed, 73 insertions(+), 73 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c28ea439e0b..3a25cf22d732 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1465,7 +1465,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * invalidate intermediate page cache borrowed from meta inode * which are used for migration of encrypted inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi->sb)) + if (f2fs_sb_has_encrypt(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dfa28ddd56c..d35161ed86b2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -152,12 +152,13 @@ struct f2fs_mount_info { #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_HAS_FEATURE(sb, mask) \ - ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) -#define F2FS_SET_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sb, mask) \ - (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +#define __F2FS_HAS_FEATURE(raw_super, mask) \ + ((raw_super->feature & cpu_to_le32(mask)) != 0) +#define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) +#define F2FS_SET_FEATURE(sbi, mask) \ + (sbi->raw_super->feature |= cpu_to_le32(mask)) +#define F2FS_CLEAR_FEATURE(sbi, mask) \ + (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* * Default values for user and/or group using reserved blocks @@ -3458,9 +3459,9 @@ static inline bool f2fs_post_read_required(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct super_block *sb) \ +static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ + return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); @@ -3490,7 +3491,7 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - return f2fs_sb_has_blkzoned(sbi->sb); + return f2fs_sb_has_blkzoned(sbi); } static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) @@ -3565,7 +3566,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) return true; if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) @@ -3588,7 +3589,7 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) { #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sbi->sb)) + if (f2fs_sb_has_quota_ino(sbi)) return true; if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 88b124677189..82f144052fce 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -696,7 +696,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, unsigned int flags; if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_inode_crtime(inode->i_sb) && + f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = fi->i_crtime.tv_sec; @@ -2030,7 +2030,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -2040,7 +2040,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp)))) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -2051,7 +2051,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_encrypt(inode->i_sb)) + if (!f2fs_sb_has_encrypt(sbi)) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2635,12 +2635,11 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct super_block *sb = sbi->sb; struct page *ipage; kprojid_t kprojid; int err; - if (!f2fs_sb_has_project_quota(sb)) { + if (!f2fs_sb_has_project_quota(sbi)) { if (projid != F2FS_DEF_PROJID) return -EOPNOTSUPP; else @@ -2757,7 +2756,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & F2FS_FL_USER_VISIBLE); - if (f2fs_sb_has_project_quota(inode->i_sb)) + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, fi->i_projid); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ceee0ed4c4..f63d4fe6156d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -118,7 +118,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page { struct f2fs_inode *ri = &F2FS_NODE(page)->i; - if (!f2fs_sb_has_inode_chksum(sbi->sb)) + if (!f2fs_sb_has_inode_chksum(sbi)) return false; if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR)) @@ -218,7 +218,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) + if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, @@ -228,7 +228,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } if (f2fs_has_extra_attr(inode) && - !f2fs_sb_has_extra_attr(sbi->sb)) { + !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, "%s: inode (ino=%lx) is with extra_attr, " @@ -340,7 +340,7 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -390,14 +390,14 @@ static int do_read_inode(struct inode *inode) if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) i_projid = (projid_t)le32_to_cpu(ri->i_projid); else i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); @@ -542,11 +542,11 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); - if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { projid_t i_projid; @@ -556,7 +556,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_projid = cpu_to_le32(i_projid); } - if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_crtime)) { ri->i_crtime = diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 99299ede7429..2d5626939893 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -61,7 +61,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto fail; } - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; else @@ -79,7 +79,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - if (f2fs_sb_has_extra_attr(sbi->sb)) { + if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } @@ -92,7 +92,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) xattr_size = F2FS_OPTION(sbi).inline_xattr_size; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2b34206486d8..904f4b97a1aa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2563,17 +2563,17 @@ retry: if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; - if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + if (f2fs_sb_has_flexible_inline_xattr(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_inline_xattr_size)) dst->i_inline_xattr_size = src->i_inline_xattr_size; - if (f2fs_sb_has_project_quota(sbi->sb) && + if (f2fs_sb_has_project_quota(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; - if (f2fs_sb_has_inode_crtime(sbi->sb) && + if (f2fs_sb_has_inode_crtime(sbi) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_crtime_nsec)) { dst->i_crtime = src->i_crtime; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 1dfb17f9f9ff..c22f9d0011ba 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -250,7 +250,7 @@ static int recover_inode(struct inode *inode, struct page *page) i_gid_write(inode, le32_to_cpu(raw->i_gid)); if (raw->i_inline & F2FS_EXTRA_ATTR) { - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), i_projid)) { projid_t i_projid; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d161f5a8b3d6..f1a57f54e749 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1736,7 +1736,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1948,7 +1948,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_has_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi) || (force && len < cpc->trim_minlen)) goto skip; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2d18de5e7c3f..b7121314e91e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -259,7 +259,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } - if (f2fs_sb_has_quota_ino(sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { f2fs_msg(sb, KERN_INFO, "QUOTA feature is enabled, so ignore qf_name"); return 0; @@ -314,7 +314,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * 'grpquota' mount options are allowed even without quota feature * to support legacy quotas in quota files. */ - if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi->sb)) { + if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " "Cannot enable project quota enforcement."); return -1; @@ -348,7 +348,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } } - if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; @@ -417,7 +417,7 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, DISCARD); break; case Opt_nodiscard: - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -566,7 +566,7 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); @@ -758,7 +758,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_test_dummy_encryption: #ifdef CONFIG_F2FS_FS_ENCRYPTION - if (!f2fs_sb_has_encrypt(sb)) { + if (!f2fs_sb_has_encrypt(sbi)) { f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); return -EINVAL; } @@ -799,13 +799,13 @@ static int parse_options(struct super_block *sb, char *options) if (f2fs_check_quota_options(sbi)) return -EINVAL; #else - if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); return -EINVAL; } - if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) { + if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_msg(sb, KERN_ERR, "Filesystem with project quota feature cannot be " "mounted RDWR without CONFIG_QUOTA"); @@ -821,8 +821,8 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { - if (!f2fs_sb_has_extra_attr(sb) || - !f2fs_sb_has_flexible_inline_xattr(sb)) { + if (!f2fs_sb_has_extra_attr(sbi) || + !f2fs_sb_has_flexible_inline_xattr(sbi)) { f2fs_msg(sb, KERN_ERR, "extra_attr or flexible_inline_xattr " "feature is off"); @@ -1431,7 +1431,7 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); - if (f2fs_sb_has_blkzoned(sbi->sb)) + if (f2fs_sb_has_blkzoned(sbi)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -1575,7 +1575,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { dquot_resume(sb, -1); - } else if (f2fs_sb_has_quota_ino(sb)) { + } else if (f2fs_sb_has_quota_ino(sbi)) { err = f2fs_enable_quotas(sb); if (err) goto restore_opts; @@ -1817,7 +1817,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) int enabled = 0; int i, err; - if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { f2fs_msg(sbi->sb, KERN_ERR, @@ -1848,7 +1848,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, unsigned long qf_inum; int err; - BUG_ON(!f2fs_sb_has_quota_ino(sb)); + BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); qf_inum = f2fs_qf_ino(sb, type); if (!qf_inum) @@ -1993,7 +1993,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) goto out_put; err = dquot_quota_off(sb, type); - if (err || f2fs_sb_has_quota_ino(sb)) + if (err || f2fs_sb_has_quota_ino(F2FS_SB(sb))) goto out_put; inode_lock(inode); @@ -2173,7 +2173,7 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, * if LOST_FOUND feature is enabled. * */ - if (f2fs_sb_has_lost_found(sbi->sb) && + if (f2fs_sb_has_lost_found(sbi) && inode->i_ino == F2FS_ROOT_INO(sbi)) return -EPERM; @@ -2396,7 +2396,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, __u32 crc = 0; /* Check checksum_offset and crc in superblock */ - if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { @@ -2746,7 +2746,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_has_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2877,7 +2877,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* we should update superblock crc here */ - if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + if (!recover && f2fs_sb_has_sb_chksum(sbi)) { crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), offsetof(struct f2fs_super_block, crc)); F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); @@ -2965,7 +2965,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_has_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -3061,7 +3061,7 @@ try_onemore: sbi->raw_super = raw_super; /* precompute checksum seed for metadata */ - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); @@ -3071,7 +3071,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_has_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sbi)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -3098,13 +3098,13 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; - if (f2fs_sb_has_quota_ino(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi)) { for (i = 0; i < MAXQUOTAS; i++) { if (f2fs_qf_ino(sbi->sb, i)) sbi->nquota_files++; @@ -3294,7 +3294,7 @@ try_onemore: #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) f2fs_msg(sb, KERN_ERR, @@ -3389,7 +3389,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA f2fs_truncate_quota_inode_pages(sb); - if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) + if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif /* diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b777cbdd796b..240e4881279e 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -90,34 +90,34 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_encrypt(sb)) + if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_has_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); - if (f2fs_sb_has_extra_attr(sb)) + if (f2fs_sb_has_extra_attr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "extra_attr"); - if (f2fs_sb_has_project_quota(sb)) + if (f2fs_sb_has_project_quota(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "projquota"); - if (f2fs_sb_has_inode_chksum(sb)) + if (f2fs_sb_has_inode_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); - if (f2fs_sb_has_flexible_inline_xattr(sb)) + if (f2fs_sb_has_flexible_inline_xattr(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); - if (f2fs_sb_has_quota_ino(sb)) + if (f2fs_sb_has_quota_ino(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); - if (f2fs_sb_has_inode_crtime(sb)) + if (f2fs_sb_has_inode_crtime(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); - if (f2fs_sb_has_lost_found(sb)) + if (f2fs_sb_has_lost_found(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); - if (f2fs_sb_has_sb_chksum(sb)) + if (f2fs_sb_has_sb_chksum(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); From b9898bb40cc060df58609bdb4b99e91282f4741c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Oct 2018 18:37:26 +0800 Subject: [PATCH 06/45] f2fs: introduce __is_large_section() for cleanup Introduce a wrapper __is_large_section() to clean up codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 4 ++-- fs/f2fs/segment.c | 16 ++++++++-------- fs/f2fs/segment.h | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 139b4d5c83d5..e327eefdbc02 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -197,7 +197,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d35161ed86b2..e4538a87a80b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2762,6 +2762,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_unlock(&sbi->iostat_lock); } +#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) + #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \ (!is_read_io(fio->op) || fio->is_meta)) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 82f144052fce..a114eacc0d7e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2560,7 +2560,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) return -EFAULT; if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || - sbi->segs_per_sec != 1) { + __is_large_section(sbi)) { f2fs_msg(sbi->sb, KERN_WARNING, "Can't flush %u in %d for segs_per_sec %u != 1\n", range.dev_num, sbi->s_ndevs, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9707773fdaac..84f49cb3147c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1109,7 +1109,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int submitted = 0; /* readahead multi ssa blocks those have contiguous address */ - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, META_SSA, true); @@ -1318,7 +1318,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ - if (sbi->s_ndevs && sbi->segs_per_sec == 1) + if (sbi->s_ndevs && !__is_large_section(sbi)) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f1a57f54e749..204d31e58967 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1884,7 +1884,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason & CP_DISCARD); - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); mutex_lock(&dirty_i->seglist_lock); @@ -1916,7 +1916,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, (end - 1) <= cpc->trim_end) continue; - if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2148,7 +2148,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* update total number of valid blocks to be written in ckpt area */ SIT_I(sbi)->written_valid_blocks += del; - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, segno)->valid_blocks += del; } @@ -2414,7 +2414,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { /* if segs_per_sec is large than 1, we need to keep original policy. */ - if (sbi->segs_per_sec != 1) + if (__is_large_section(sbi)) return CURSEG_I(sbi, type)->segno; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) @@ -2724,7 +2724,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) struct discard_policy dpolicy; unsigned long long trimmed = 0; int err = 0; - bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1; + bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi); if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; @@ -3882,7 +3882,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) if (!sit_i->tmp_map) return -ENOMEM; - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { sit_i->sec_entries = f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry), MAIN_SECS(sbi)), @@ -4037,7 +4037,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks; } - if (sbi->segs_per_sec > 1) + if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; } @@ -4081,7 +4081,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sbi->discard_blks -= se->valid_blocks; } - if (sbi->segs_per_sec > 1) { + if (__is_large_section(sbi)) { get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; get_sec_entry(sbi, start)->valid_blocks -= diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ab3465faddf1..a77f76f528b6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -333,7 +333,7 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (use_section && sbi->segs_per_sec > 1) + if (use_section && __is_large_section(sbi)) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; From 29ac5942161b861064ce4dff2ea4d0d36461c4a1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Oct 2018 18:37:27 +0800 Subject: [PATCH 07/45] f2fs: support subsectional garbage collection Section is minimal garbage collection unit of f2fs, in zoned block device, or ancient block mapping flash device, in order to improve GC efficiency, we can align GC unit to lower device erase unit, normally, it consists of multiple of segments. Once background or foreground GC triggers, it brings a large number of IOs, which will impact user IO, and also occupy cpu/memory resource intensively. So, to reduce impact of GC on large size section, this patch supports subsectional GC, in one cycle of GC, it only migrate partial segment{s} in victim section. Currently, by default, we use sbi->segs_per_sec as migration granularity. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 39 +++++++++++++++++++++++++++++++++------ fs/f2fs/super.c | 3 +++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e4538a87a80b..dda1bad4fe5e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1263,6 +1263,7 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ + unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ @@ -1272,6 +1273,8 @@ struct f2fs_sb_info { /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; + /* migration granularity of garbage collection, unit: segment */ + unsigned int migration_granularity; /* * for stat information. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 84f49cb3147c..8606ebf509cb 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -333,6 +333,22 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.max_search == 0) goto out; + if (__is_large_section(sbi) && p.alloc_mode == LFS) { + if (sbi->next_victim_seg[BG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[BG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + goto got_result; + } + if (gc_type == FG_GC && + sbi->next_victim_seg[FG_GC] != NULL_SEGNO) { + p.min_segno = sbi->next_victim_seg[FG_GC]; + *result = p.min_segno; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + goto got_result; + } + } + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); @@ -395,6 +411,8 @@ next: } if (p.min_segno != NULL_SEGNO) { got_it: + *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; +got_result: if (p.alloc_mode == LFS) { secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) @@ -402,7 +420,6 @@ got_it: else set_bit(secno, dirty_i->victim_secmap); } - *result = (p.min_segno / p.ofs_unit) * p.ofs_unit; trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, @@ -1103,15 +1120,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; int submitted = 0; + if (__is_large_section(sbi)) + end_segno = rounddown(end_segno, sbi->segs_per_sec); + /* readahead multi ssa blocks those have contiguous address */ if (__is_large_section(sbi)) f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), - sbi->segs_per_sec, META_SSA, true); + end_segno - segno, META_SSA, true); /* reference all summary page */ while (segno < end_segno) { @@ -1142,8 +1162,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (get_valid_blocks(sbi, segno, false) == 0) goto freed; + if (__is_large_section(sbi) && + migrated >= sbi->migration_granularity) + goto skip; if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) - goto next; + goto skip; sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { @@ -1151,7 +1174,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, "type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - goto next; + goto skip; } /* @@ -1174,7 +1197,11 @@ freed: if (gc_type == FG_GC && get_valid_blocks(sbi, segno, false) == 0) seg_freed++; -next: + migrated++; + + if (__is_large_section(sbi) && segno + 1 < end_segno) + sbi->next_victim_seg[gc_type] = segno + 1; +skip: f2fs_put_page(sum_page, 0); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b7121314e91e..12a7cb0eaa73 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2692,7 +2692,10 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; + sbi->migration_granularity = sbi->segs_per_sec; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; From 5ae4d1bb93e2141a3cf0c017d2c286430cc5c1c7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Oct 2018 16:19:28 +0800 Subject: [PATCH 08/45] f2fs: export migration_granularity sysfs entry Add one sysfs entry to control migration granularity of GC in large section f2fs, it can be tuned to mitigate heavy overhead of migrating huge number of blocks in large section. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 +++++++++ fs/f2fs/sysfs.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3ac41774ad3c..a7ce33199457 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -92,6 +92,15 @@ Contact: "Jaegeuk Kim" Description: Controls the number of trials to find a victim segment. +What: /sys/fs/f2fs//migration_granularity +Date: October 2018 +Contact: "Chao Yu" +Description: + Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. + What: /sys/fs/f2fs//dir_level Date: March 2014 Contact: "Jaegeuk Kim" diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 240e4881279e..0575edbe3ed6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -246,6 +246,11 @@ out: return count; } + if (!strcmp(a->attr.name, "migration_granularity")) { + if (t == 0 || t > sbi->segs_per_sec) + return -EINVAL; + } + if (!strcmp(a->attr.name, "trim_sections")) return -EINVAL; @@ -406,6 +411,7 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); @@ -460,6 +466,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), + ATTR_LIST(migration_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), From 8c7d937eca6f121de24c96b19f3e5893ad398aa0 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 30 Oct 2018 20:37:55 +0800 Subject: [PATCH 09/45] f2fs: change segment to section in f2fs_ioc_gc_range f2fs_ioc_gc_range skips blocks_per_seg each time, however, f2fs_gc moves blocks of section each time, so fix it from segment to section. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a114eacc0d7e..c25228a1593c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2155,7 +2155,7 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); - range.start += sbi->blocks_per_seg; + range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; out: From 4a73d3fa1a9302243d42f501b294f3bee5ecdee9 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 5 Nov 2018 09:41:48 -0500 Subject: [PATCH 10/45] f2fs: Change to use DEFINE_SHOW_ATTRIBUTE macro Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index e327eefdbc02..0ba662315d93 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -444,18 +444,7 @@ static int stat_show(struct seq_file *s, void *v) return 0; } -static int stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, stat_show, inode->i_private); -} - -static const struct file_operations stat_fops = { - .owner = THIS_MODULE, - .open = stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(stat); int f2fs_build_stats(struct f2fs_sb_info *sbi) { From fed5a77f74a2316ec424c2c7f98350bdc753dd83 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 6 Nov 2018 10:25:29 +0800 Subject: [PATCH 11/45] f2fs: move dir data flush to write checkpoint process This patch move dir data flush to write checkpoint process, by doing this, it may reduce some time for dir fsync. pre: -f2fs_do_sync_file enter -file_write_and_wait_range <- flush & wait -write_checkpoint -do_checkpoint <- wait all -f2fs_do_sync_file exit now: -f2fs_do_sync_file enter -write_checkpoint -block_operations <- flush dir & no wait -do_checkpoint <- wait all -f2fs_do_sync_file exit Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c25228a1593c..439d340dbe1a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -216,6 +216,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, trace_f2fs_sync_file_enter(inode); + if (S_ISDIR(inode->i_mode)) + goto go_write; + /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(inode, FI_NEED_IPU); From 68731d7ec6cc4c9673f35e70623276fa3bb4d158 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Nov 2018 00:46:46 +0800 Subject: [PATCH 12/45] f2fs: add to account direct IO This patch adds f2fs_dio_submit_bio() to hook submit_io/end_io functions in direct IO path, in order to account DIO. Later, we will add this count into is_idle() to let background GC/Discard thread be aware of DIO. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 10 ++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 106f116466bf..70450b547034 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2548,6 +2548,53 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } +static void f2fs_dio_end_io(struct bio *bio) +{ + struct f2fs_private_dio *dio = bio->bi_private; + + dec_page_count(F2FS_I_SB(dio->inode), + dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + bio->bi_private = dio->orig_private; + bio->bi_end_io = dio->orig_end_io; + + kfree(dio); + + bio_endio(bio); +} + +static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, + loff_t file_offset) +{ + struct f2fs_private_dio *dio; + bool write = (bio_op(bio) == REQ_OP_WRITE); + int err; + + dio = f2fs_kzalloc(F2FS_I_SB(inode), + sizeof(struct f2fs_private_dio), GFP_NOFS); + if (!dio) { + err = -ENOMEM; + goto out; + } + + dio->inode = inode; + dio->orig_end_io = bio->bi_end_io; + dio->orig_private = bio->bi_private; + dio->write = write; + + bio->bi_end_io = f2fs_dio_end_io; + bio->bi_private = dio; + + inc_page_count(F2FS_I_SB(inode), + write ? F2FS_DIO_WRITE : F2FS_DIO_READ); + + submit_bio(bio); + return; +out: + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +} + static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -2594,7 +2641,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) down_read(&fi->i_gc_rwsem[READ]); } - err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); + err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, get_data_block_dio, NULL, f2fs_dio_submit_bio, + DIO_LOCKING | DIO_SKIP_HOLES); if (do_opu) up_read(&fi->i_gc_rwsem[READ]); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0ba662315d93..1f1230f690ec 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -53,6 +53,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); + si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); + si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); @@ -374,6 +376,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - DIO (R: %4d, W: %4d)\n", + si->nr_dio_read, si->nr_dio_write); seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dda1bad4fe5e..778879daab04 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -957,6 +957,8 @@ enum count_type { F2FS_RD_DATA, F2FS_RD_NODE, F2FS_RD_META, + F2FS_DIO_WRITE, + F2FS_DIO_READ, NR_COUNT_TYPE, }; @@ -1333,6 +1335,13 @@ struct f2fs_sb_info { __u32 s_chksum_seed; }; +struct f2fs_private_dio { + struct inode *inode; + void *orig_private; + bio_end_io_t *orig_end_io; + bool write; +}; + #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(type) \ printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ @@ -3152,6 +3161,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_rd_data, nr_rd_node, nr_rd_meta; + int nr_dio_read, nr_dio_write; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; From 812f776aac25b64f1e865e77d0f3cf18955166fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 12 Nov 2018 00:55:44 +0800 Subject: [PATCH 13/45] f2fs: fix to be aware discard/preflush/dio command in is_idle() This patch adds missing in-flight discard/preflush/dio command count check in is_idle(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 778879daab04..039664c7e530 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2158,7 +2158,11 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || - get_pages(sbi, F2FS_WB_CP_DATA)) + get_pages(sbi, F2FS_WB_CP_DATA) || + get_pages(sbi, F2FS_DIO_READ) || + get_pages(sbi, F2FS_DIO_WRITE) || + atomic_read(&SM_I(sbi)->dcc_info->issing_discard) || + atomic_read(&SM_I(sbi)->fcc_info->issing_flush)) return false; return f2fs_time_over(sbi, type); } From 3d6163130878c51e496f9c8be416ca8013840e87 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Nov 2018 14:33:45 +0800 Subject: [PATCH 14/45] f2fs: fix out-place-update DIO write In get_more_blocks(), we may override @create as below code: create = dio->op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> i_blkbits)) create = 0; } But in f2fs_map_blocks(), we only trigger f2fs_balance_fs() if @create is 1, so in LFS mode, dio overwrite under LFS mode can easily run out of free segments, result in below panic. Call Trace: allocate_segment_by_default+0xa8/0x270 [f2fs] f2fs_allocate_data_block+0x1ea/0x5c0 [f2fs] __allocate_data_block+0x306/0x480 [f2fs] f2fs_map_blocks+0x6f6/0x920 [f2fs] __get_data_block+0x4f/0xb0 [f2fs] get_data_block_dio_write+0x50/0x60 [f2fs] do_blockdev_direct_IO+0xcd5/0x21e0 __blockdev_direct_IO+0x3a/0x3c f2fs_direct_IO+0x1ff/0x4a0 [f2fs] generic_file_direct_write+0xd9/0x160 __generic_file_write_iter+0xbb/0x1e0 f2fs_file_write_iter+0xaf/0x220 [f2fs] __vfs_write+0xd0/0x130 vfs_write+0xb2/0x1b0 SyS_pwrite64+0x69/0xa0 ? vtime_user_exit+0x29/0x70 do_syscall_64+0x6e/0x160 entry_SYSCALL64_slow_path+0x25/0x25 RIP: new_curseg+0x36f/0x380 [f2fs] RSP: ffffac570393f7a8 So this patch introduces a parameter map.m_may_create to indicate that f2fs_map_blocks() is called from write or read path, which can give the right hint to let f2fs_map_blocks() trigger OPU allocation and call f2fs_balanc_fs() correctly. BTW, it disables physical address preallocation for direct IO in f2fs_preallocate_blocks, which is redundant to OPU allocation of f2fs_map_blocks. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 42 +++++++++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 3 ++- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 70450b547034..404a39022bbe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -957,6 +957,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } + if (direct_io && allow_outplace_dio(inode, iocb, from)) + return 0; + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -970,6 +973,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = true; if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); @@ -1028,7 +1032,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE; + int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -1062,7 +1066,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } next_dnode: - if (create) + if (map->m_may_create) __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ @@ -1099,8 +1103,8 @@ next_block: if (is_valid_data_blkaddr(sbi, blkaddr)) { /* use out-place-update for driect IO under LFS mode */ - if (test_opt(sbi, LFS) && create && - flag == F2FS_GET_BLOCK_DIO) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); if (!err) set_inode_flag(inode, FI_APPEND_WRITE); @@ -1209,7 +1213,7 @@ skip: f2fs_put_dnode(&dn); - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1235,7 +1239,7 @@ sync_out: } f2fs_put_dnode(&dn); unlock_out: - if (create) { + if (map->m_may_create) { __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } @@ -1271,7 +1275,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs, int seg_type) + pgoff_t *next_pgofs, int seg_type, bool may_write) { struct f2fs_map_blocks map; int err; @@ -1281,6 +1285,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; + map.m_may_create = may_write; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1297,16 +1302,25 @@ static int get_data_block(struct inode *inode, sector_t iblock, { return __get_data_block(inode, iblock, bh_result, create, flag, next_pgofs, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); +} + +static int get_data_block_dio_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return __get_data_block(inode, iblock, bh_result, create, + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + true); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL, - f2fs_rw_hint_to_seg_type( - inode->i_write_hint)); + F2FS_GET_BLOCK_DIO, NULL, + f2fs_rw_hint_to_seg_type(inode->i_write_hint), + false); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1318,7 +1332,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE); + NO_CHECK_TYPE, create); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1525,6 +1539,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; for (; nr_pages; nr_pages--) { if (pages) { @@ -2642,7 +2657,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, - iter, get_data_block_dio, NULL, f2fs_dio_submit_bio, + iter, rw == WRITE ? get_data_block_dio_write : + get_data_block_dio, NULL, f2fs_dio_submit_bio, DIO_LOCKING | DIO_SKIP_HOLES); if (do_opu) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 039664c7e530..578a27f24356 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -602,6 +602,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; + bool m_may_create; /* indicate it is from write path */ }; /* for flag in get_data_block */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 439d340dbe1a..327183000b1c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1499,7 +1499,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, + .m_may_create = true }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; From 45f765672c713b52b87581b5134b164e409f4004 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Tue, 13 Nov 2018 11:57:32 +0800 Subject: [PATCH 15/45] f2fs: only flush the single temp bio cache which owns the target page Previously, when f2fs finds which temp bio cache owns the target page, it will flush all the three temp bio caches, but we only need to flush one single bio cache indeed, which can help to keep bio merged. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 404a39022bbe..800543ece14a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -372,29 +372,6 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, return false; } -static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, nid_t ino, - enum page_type type) -{ - enum page_type btype = PAGE_TYPE_OF_BIO(type); - enum temp_type temp; - struct f2fs_bio_info *io; - bool ret = false; - - for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { - io = sbi->write_io[btype] + temp; - - down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); - up_read(&io->io_rwsem); - - /* TODO: use HOT temp only for meta pages now. */ - if (ret || btype == META) - break; - } - return ret; -} - static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { @@ -420,13 +397,19 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type, bool force) { enum temp_type temp; - - if (!force && !has_merged_page(sbi, inode, page, ino, type)) - return; + bool ret = true; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { + if (!force) { + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - __f2fs_submit_merged_write(sbi, type, temp); + down_read(&io->io_rwsem); + ret = __has_merged_page(io, inode, page, ino); + up_read(&io->io_rwsem); + } + if (ret) + __f2fs_submit_merged_write(sbi, type, temp); /* TODO: use HOT temp only for meta pages now. */ if (type >= META) From e287b02c3861798dfc7c86ed4ed2eafb3db06b41 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Nov 2018 12:40:30 -0800 Subject: [PATCH 16/45] f2fs: check memory boundary by insane namelen If namelen is corrupted to have very long value, fill_dentries can copy wrong memory area. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2ef84b4590ea..9f2cc6c0a4c8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -808,6 +808,17 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); + /* check memory boundary before moving forward */ + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + if (unlikely(bit_pos > d->max)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + err = -EINVAL; + goto out; + } + if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; @@ -830,7 +841,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } out: From 78e7965f4b3aac08f5a83223d5a1469113c00679 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 14 Nov 2018 19:34:28 +0800 Subject: [PATCH 17/45] f2fs: fix race between write_checkpoint and write_begin The following race could lead to inconsistent SIT bitmap: Task A Task B ====== ====== f2fs_write_checkpoint block_operations f2fs_lock_all down_write(node_change) down_write(node_write) ... sync ... up_write(node_change) f2fs_file_write_iter set_inode_flag(FI_NO_PREALLOC) ...... f2fs_write_begin(index=0, has inline data) prepare_write_begin __do_map_lock(AIO) => down_read(node_change) f2fs_convert_inline_page => update SIT __do_map_lock(AIO) => up_read(node_change) f2fs_flush_sit_entries <= inconsistent SIT finish write checkpoint sudden-power-off If SPO occurs after checkpoint is finished, SIT bitmap will be set incorrectly. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 800543ece14a..2760bdeb9e7b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2322,6 +2322,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, bool locked = false; struct extent_info ei = {0,0,0}; int err = 0; + int flag; /* * we already allocated all the blocks, so we don't need to get @@ -2331,9 +2332,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; + /* f2fs_lock_op avoids race between write CP and convert_inline_page */ + if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) + flag = F2FS_GET_BLOCK_DEFAULT; + else + flag = F2FS_GET_BLOCK_PRE_AIO; + if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + __do_map_lock(sbi, flag, true); locked = true; } restart: @@ -2371,6 +2378,7 @@ restart: f2fs_put_dnode(&dn); __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; goto restart; } @@ -2384,7 +2392,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + __do_map_lock(sbi, flag, false); return err; } From 11b459f205cf24b78e0cd2d7154439e16877358f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Nov 2018 14:20:32 -0800 Subject: [PATCH 18/45] f2fs: avoid build warn of fall_through After merging the f2fs tree, today's linux-next build (x86_64_allmodconfig) produced this warning: In file included from fs/f2fs/dir.c:11: fs/f2fs/f2fs.h: In function '__mark_inode_dirty_flag': fs/f2fs/f2fs.h:2388:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (set) ^ fs/f2fs/f2fs.h:2390:2: note: here case FI_DATA_EXIST: ^~~~ Exposed by my use of -Wimplicit-fallthrough Reported-by: Stephen Rothwell Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 578a27f24356..9abd16dce00c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2387,6 +2387,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_NEW_INODE: if (set) return; + /* fall through */ case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: From 7fa16a3cec36be588cde8d76e329a834011c78d1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 21 Nov 2018 07:21:38 +0800 Subject: [PATCH 19/45] f2fs: fix wrong return value of f2fs_acl_create When call f2fs_acl_create_masq() failed, the caller f2fs_acl_create() should return -EIO instead of -ENOMEM, this patch makes it consistent with posix_acl_create() which has been fixed in commit beaf226b863a ("posix_acl: don't ignore return value of posix_acl_create_masq()"). Fixes: 83dfe53c185e ("f2fs: fix reference leaks in f2fs_acl_create") Signed-off-by: Tiezhu Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index fa707cdd4120..22f0d17cde43 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -352,12 +352,14 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return PTR_ERR(p); clone = f2fs_acl_clone(p, GFP_NOFS); - if (!clone) - goto no_mem; + if (!clone) { + ret = -ENOMEM; + goto release_acl; + } ret = f2fs_acl_create_masq(clone, mode); if (ret < 0) - goto no_mem_clone; + goto release_clone; if (ret == 0) posix_acl_release(clone); @@ -371,11 +373,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, return 0; -no_mem_clone: +release_clone: posix_acl_release(clone); -no_mem: +release_acl: posix_acl_release(p); - return -ENOMEM; + return ret; } int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, From 3d704410f6a818d857df44190b7ac8df4c5387ee Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 22 Nov 2018 18:58:46 +0800 Subject: [PATCH 20/45] f2fs: read page index before freeing The function truncate_node frees the page with f2fs_put_page. However, the page index is read after that. So, the patch reads the index before freeing the page. Fixes: bf39c00a9a7f ("f2fs: drop obsolete node page when it is truncated") Cc: Signed-off-by: Pan Bian Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 904f4b97a1aa..94224be11daa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -826,6 +826,7 @@ static int truncate_node(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; int err; + pgoff_t index; err = f2fs_get_node_info(sbi, dn->nid, &ni); if (err) @@ -845,10 +846,11 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); + index = dn->node_page->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), - dn->node_page->index, dn->node_page->index); + index, index); dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); From f6cb4680e02b734f88b719d335f4fd52a80c6dfe Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 24 Nov 2018 12:06:42 +0300 Subject: [PATCH 21/45] f2fs: make "f2fs_fault_name[]" const char * Those strings are immutable. Signed-off-by: Alexey Dobriyan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9abd16dce00c..76737933b6be 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -67,7 +67,7 @@ struct f2fs_fault_info { unsigned int inject_type; }; -extern char *f2fs_fault_name[FAULT_MAX]; +extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 12a7cb0eaa73..e184ad4e4e90 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -38,7 +38,7 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION -char *f2fs_fault_name[FAULT_MAX] = { +const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", From 7d155adef4ac6fe15402ad3797d595fd32c42da2 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 26 Nov 2018 13:31:41 +0530 Subject: [PATCH 22/45] f2fs: fix to allow node segment for GC by ioctl path Allow node type segments also to be GC'd via f2fs ioctl F2FS_IOC_GARBAGE_COLLECT_RANGE. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8606ebf509cb..9a60801ab1c5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -323,8 +323,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && - get_valid_blocks(sbi, *result, false) && + if (get_valid_blocks(sbi, *result, false) && !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) p.min_segno = *result; goto out; From 1bb27a8acc07ec2233e6bab8ad58765027526a03 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 26 Nov 2018 13:31:42 +0530 Subject: [PATCH 23/45] f2fs: adjust trace print in f2fs_get_victim() to cover all paths Adjust the trace print in f2fs_get_victim() to cover GC done by F2FS_IOC_GARBAGE_COLLECT_RANGE. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9a60801ab1c5..71462f2e47d4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -420,11 +420,12 @@ got_result: set_bit(secno, dirty_i->victim_secmap); } + } +out: + if (p.min_segno != NULL_SEGNO) trace_f2fs_get_victim(sbi->sb, type, gc_type, &p, sbi->cur_victim_sec, prefree_segments(sbi), free_segments(sbi)); - } -out: mutex_unlock(&dirty_i->seglist_lock); return (p.min_segno == NULL_SEGNO) ? 0 : 1; From 66b7f923b82c7de5ecf9a8a035565787c54c3550 Mon Sep 17 00:00:00 2001 From: Jia Zhu Date: Tue, 27 Nov 2018 02:32:32 +0800 Subject: [PATCH 24/45] f2fs: fix to update new block address correctly for OPU Previously, we allocated a new block address for OPU mode in direct_IO. But the new address couldn't be assigned to @map->m_pblk correctly. This patch fix it. Cc: Fixes: 511f52d02f05 ("f2fs: allow out-place-update for direct IO in LFS mode") Signed-off-by: Jia Zhu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2760bdeb9e7b..3ed6bbd0f4b4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1089,8 +1089,10 @@ next_block: if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { err = __allocate_data_block(&dn, map->m_seg_type); - if (!err) + if (!err) { + blkaddr = dn.data_blkaddr; set_inode_flag(inode, FI_APPEND_WRITE); + } } } else { if (create) { From 2f3fdac27c1af3d99aeb0379f4790e76bd90faa3 Mon Sep 17 00:00:00 2001 From: Jia Zhu Date: Tue, 20 Nov 2018 04:29:35 +0800 Subject: [PATCH 25/45] f2fs: fix m_may_create to make OPU DIO write correctly Previously, we added a parameter @map.m_may_create to trigger OPU allocation and call f2fs_balance_fs() correctly. But in get_more_blocks(), @create has been overwritten by below code. So the function f2fs_map_blocks() will not allocate new block address but directly go out. Meanwile,there are several functions calling f2fs_map_blocks() directly and @map.m_may_create not initialized. CODE: create = dio->op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> i_blkbits)) create = 0; } This patch fixes it. Signed-off-by: Jia Zhu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++++ fs/f2fs/file.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3ed6bbd0f4b4..60df850f4911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1035,6 +1035,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, end = pgofs + maxblocks; if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO && + map->m_may_create) + goto next_dnode; + map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; @@ -1246,6 +1250,7 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) map.m_next_pgofs = NULL; map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; last_lblk = F2FS_BLK_ALIGN(pos + len); while (map.m_lblk < last_lblk) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 327183000b1c..ff82350a2c55 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2201,7 +2201,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_extent = NULL, - .m_seg_type = NO_CHECK_TYPE }; + .m_seg_type = NO_CHECK_TYPE , + .m_may_create = false }; struct extent_info ei = {0, 0, 0}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; @@ -2935,6 +2936,7 @@ int f2fs_precache_extents(struct inode *inode) map.m_next_pgofs = NULL; map.m_next_extent = &m_next_extent; map.m_seg_type = NO_CHECK_TYPE; + map.m_may_create = false; end = F2FS_I_SB(inode)->max_file_blocks; while (map.m_lblk < end) { From 06f722a01ed7128767b0d7de5c1aaf0a8489a216 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 27 Nov 2018 23:28:37 -0800 Subject: [PATCH 26/45] f2fs: avoid frequent costly fsck triggers If we want to re-enable nat_bits, we rely on fsck which requires full scan of directory tree. Let's do that by regular fsck or unclean shutdown. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 76737933b6be..170a916cc5c9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1621,7 +1621,11 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; - set_sbi_flag(sbi, SBI_NEED_FSCK); + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); From c40d5120400961118a4c33c934a283463107712c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Nov 2018 13:26:03 -0800 Subject: [PATCH 27/45] f2fs: add an ioctl() to explicitly trigger fsck later This adds an option in ioctl(F2FS_IOC_SHUTDOWN) in order to trigger fsck by setting a NEED_FSCK flag. Generally, shutdown is used for the test to validate filesystem consistency, and setting NEED_FSCK flag can be used for Android to trigger fsck.f2fs at boot time explicitly so that we could measure the elapsed time as well as force filesystem check. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 170a916cc5c9..1ebd115a9359 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -417,6 +417,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff82350a2c55..ca9bdbb8651b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1966,6 +1966,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; + case F2FS_GOING_DOWN_NEED_FSCK: + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* do checkpoint only */ + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; + break; default: ret = -EINVAL; goto out; From de7b2c27b7fdf6bb5ba727af0407bc5676433777 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 4 Dec 2018 22:59:21 +0800 Subject: [PATCH 28/45] f2fs: clear PG_writeback if IPU failed If IPU failed, nothing is commited, we should end page writeback. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 60df850f4911..217f977734ab 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1860,6 +1860,8 @@ got_it: if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); + if (err && PageWriteback(page)) + end_page_writeback(page); trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); return err; From b7da8fe15f9c6e31560b4d86401a3a0318adec56 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Dec 2018 18:12:30 +0800 Subject: [PATCH 29/45] f2fs: fix to reorder set_page_dirty and wait_on_page_writeback This patch reorders flow from - update page - set_page_dirty - wait_on_page_writeback to - wait_on_page_writeback - update page - set_page_dirty The reason is: - set_page_dirty will increase reference of dirty page, the reference should be cleared before wait_on_page_writeback to keep its consistency. - some devices need stable page during page writebacking, so we should not change page's data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- fs/f2fs/gc.c | 5 +++-- fs/f2fs/node.c | 5 +++-- fs/f2fs/segment.c | 3 ++- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3a25cf22d732..4f02461f348c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1290,11 +1290,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; - memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_wait_on_page_writeback(page, META, true); f2fs_bug_on(sbi, PageWriteback(page)); + + memcpy(page_address(page), src, PAGE_SIZE); + + set_page_dirty(page); if (unlikely(!clear_page_dirty_for_io(page))) f2fs_bug_on(sbi, 1); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 71462f2e47d4..666518eb7293 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -829,8 +829,8 @@ static int move_data_block(struct inode *inode, block_t bidx, } write_page: - set_page_dirty(fio.encrypted_page); f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); + set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -924,8 +924,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, bool is_dirty = PageDirty(page); retry: - set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); + + set_page_dirty(page); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94224be11daa..c8197bd8771a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1600,10 +1600,11 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) .for_reclaim = 0, }; - set_page_dirty(node_page); f2fs_wait_on_page_writeback(node_page, NODE, true); - f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); + + set_page_dirty(node_page); + if (!clear_page_dirty_for_io(node_page)) { err = -EAGAIN; goto out_page; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 204d31e58967..e2e971e89b2d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -387,8 +387,9 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); - set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); + + set_page_dirty(page); if (clear_page_dirty_for_io(page)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); From bb46621ef2a78f7de1f5b7b802de8c5f41329463 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 13 Dec 2018 17:43:11 +0800 Subject: [PATCH 30/45] f2fs: remove redundant comment of unused wio_mutex Commit 089842de ("f2fs: remove codes of unused wio_mutex") removes codes of unused wio_mutex, but missing the comment, so delete it. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1ebd115a9359..de89e89f62c5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1175,7 +1175,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ - /* bio ordering for NODE/DATA */ /* keep migration IO order for LFS mode */ struct rw_semaphore io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ From d7b2656f99a15872d6e4ab95e28637167353aca1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 13 Dec 2018 18:38:33 -0800 Subject: [PATCH 31/45] f2fs: use kvmalloc, if kmalloc is failed One report says memalloc failure during mount. (unwind_backtrace) from [] (show_stack+0x10/0x14) (show_stack) from [] (dump_stack+0x8c/0xa0) (dump_stack) from [] (warn_alloc+0xc4/0x160) (warn_alloc) from [] (__alloc_pages_nodemask+0x3f4/0x10d0) (__alloc_pages_nodemask) from [] (kmalloc_order_trace+0x2c/0x120) (kmalloc_order_trace) from [] (build_node_manager+0x35c/0x688) (build_node_manager) from [] (f2fs_fill_super+0xf0c/0x16cc) (f2fs_fill_super) from [] (mount_bdev+0x15c/0x188) (mount_bdev) from [] (f2fs_mount+0x18/0x20) (f2fs_mount) from [] (mount_fs+0x158/0x19c) (mount_fs) from [] (vfs_kern_mount+0x78/0x134) (vfs_kern_mount) from [] (do_mount+0x474/0xca4) (do_mount) from [] (SyS_mount+0x94/0xbc) (SyS_mount) from [] (ret_fast_syscall+0x0/0x48) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 6 ++-- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 10 +++++-- fs/f2fs/gc.c | 4 +-- fs/f2fs/inline.c | 4 +-- fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 10 +++---- fs/f2fs/segment.c | 36 +++++++++++------------ fs/f2fs/super.c | 68 ++++++++++++++++++++++---------------------- 11 files changed, 76 insertions(+), 70 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 22f0d17cde43..63e599524085 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, return (void *)f2fs_acl; fail: - kfree(f2fs_acl); + kvfree(f2fs_acl); return ERR_PTR(-EINVAL); } @@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = NULL; else acl = ERR_PTR(retval); - kfree(value); + kvfree(value); return acl; } @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); - kfree(value); + kvfree(value); if (!error) set_cached_acl(inode, type, acl); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4f02461f348c..1d8283370861 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -911,7 +911,7 @@ free_fail_no_cp: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); fail_no_cp: - kfree(sbi->ckpt); + kvfree(sbi->ckpt); return -EINVAL; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 217f977734ab..8934147cb3a2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2573,7 +2573,7 @@ static void f2fs_dio_end_io(struct bio *bio) bio->bi_private = dio->orig_private; bio->bi_end_io = dio->orig_end_io; - kfree(dio); + kvfree(dio); bio_endio(bio); } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1f1230f690ec..11d4448e8e09 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -503,7 +503,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(si); + kvfree(si); } int __init f2fs_create_root_stats(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index de89e89f62c5..422d5a975a2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1630,7 +1630,7 @@ static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) if (lock) spin_lock_irqsave(&sbi->cp_lock, flags); __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); - kfree(NM_I(sbi)->nat_bits); + kvfree(NM_I(sbi)->nat_bits); NM_I(sbi)->nat_bits = NULL; if (lock) spin_unlock_irqrestore(&sbi->cp_lock, flags); @@ -2704,12 +2704,18 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { + void *ret; + if (time_to_inject(sbi, FAULT_KMALLOC)) { f2fs_show_injection_info(FAULT_KMALLOC); return NULL; } - return kmalloc(size, flags); + ret = kmalloc(size, flags); + if (ret) + return ret; + + return kvmalloc(size, flags); } static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 666518eb7293..aed7084e8443 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -142,7 +142,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { err = PTR_ERR(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } out: @@ -155,7 +155,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); - kfree(gc_th); + kvfree(gc_th); sbi->gc_thread = NULL; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cb31a719b048..3544c73a1290 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -501,7 +501,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - kfree(backup_dentry); + kvfree(backup_dentry); return 0; recover: lock_page(ipage); @@ -512,7 +512,7 @@ recover: set_page_dirty(ipage); f2fs_put_page(ipage, 1); - kfree(backup_dentry); + kvfree(backup_dentry); return err; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2d5626939893..62d9829f3a6a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -635,7 +635,7 @@ out_f2fs_handle_failed_inode: f2fs_handle_failed_inode(inode); out_free_encrypted_link: if (disk_link.name != (unsigned char *)symname) - kfree(disk_link.name); + kvfree(disk_link.name); return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c8197bd8771a..b4f0cdcb9b19 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3118,17 +3118,17 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) kvfree(nm_i->free_nid_bitmap[i]); - kfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_bitmap); } kvfree(nm_i->free_nid_count); - kfree(nm_i->nat_bitmap); - kfree(nm_i->nat_bits); + kvfree(nm_i->nat_bitmap); + kvfree(nm_i->nat_bits); #ifdef CONFIG_F2FS_CHECK_FS - kfree(nm_i->nat_bitmap_mir); + kvfree(nm_i->nat_bitmap_mir); #endif sbi->nm_info = NULL; - kfree(nm_i); + kvfree(nm_i); } int __init f2fs_create_node_manager_caches(void) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e2e971e89b2d..de2c91964330 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -706,7 +706,7 @@ init_thread: "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } @@ -725,7 +725,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) kthread_stop(flush_thread); } if (free) { - kfree(fcc); + kvfree(fcc); SM_I(sbi)->fcc_info = NULL; } } @@ -2013,7 +2013,7 @@ init_thread: "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; return err; } @@ -2030,7 +2030,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) f2fs_stop_discard_thread(sbi); - kfree(dcc); + kvfree(dcc); SM_I(sbi)->dcc_info = NULL; } @@ -4317,7 +4317,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; - kfree(dirty_i); + kvfree(dirty_i); } static void destroy_curseg(struct f2fs_sb_info *sbi) @@ -4329,10 +4329,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi) return; SM_I(sbi)->curseg_array = NULL; for (i = 0; i < NR_CURSEG_TYPE; i++) { - kfree(array[i].sum_blk); - kfree(array[i].journal); + kvfree(array[i].sum_blk); + kvfree(array[i].journal); } - kfree(array); + kvfree(array); } static void destroy_free_segmap(struct f2fs_sb_info *sbi) @@ -4343,7 +4343,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = NULL; kvfree(free_i->free_segmap); kvfree(free_i->free_secmap); - kfree(free_i); + kvfree(free_i); } static void destroy_sit_info(struct f2fs_sb_info *sbi) @@ -4356,26 +4356,26 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) if (sit_i->sentries) { for (start = 0; start < MAIN_SEGS(sbi); start++) { - kfree(sit_i->sentries[start].cur_valid_map); + kvfree(sit_i->sentries[start].cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sentries[start].cur_valid_map_mir); + kvfree(sit_i->sentries[start].cur_valid_map_mir); #endif - kfree(sit_i->sentries[start].ckpt_valid_map); - kfree(sit_i->sentries[start].discard_map); + kvfree(sit_i->sentries[start].ckpt_valid_map); + kvfree(sit_i->sentries[start].discard_map); } } - kfree(sit_i->tmp_map); + kvfree(sit_i->tmp_map); kvfree(sit_i->sentries); kvfree(sit_i->sec_entries); kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; - kfree(sit_i->sit_bitmap); + kvfree(sit_i->sit_bitmap); #ifdef CONFIG_F2FS_CHECK_FS - kfree(sit_i->sit_bitmap_mir); + kvfree(sit_i->sit_bitmap_mir); #endif - kfree(sit_i); + kvfree(sit_i); } void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) @@ -4391,7 +4391,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi) destroy_free_segmap(sbi); destroy_sit_info(sbi); sbi->sm_info = NULL; - kfree(sm_info); + kvfree(sm_info); } int __init f2fs_create_segment_manager_caches(void) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e184ad4e4e90..b79677639108 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -289,7 +289,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, set_opt(sbi, QUOTA); return 0; errout: - kfree(qname); + kvfree(qname); return ret; } @@ -302,7 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) " when quota turned on"); return -EINVAL; } - kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -399,10 +399,10 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, BG_GC); set_opt(sbi, FORCE_FG_GC); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -570,7 +570,7 @@ static int parse_options(struct super_block *sb, char *options) f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); - kfree(name); + kvfree(name); return -EINVAL; } set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); @@ -578,10 +578,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "lfs", 3)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) @@ -714,10 +714,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "fs-based", 8)) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); @@ -731,10 +731,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "reuse", 5)) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); @@ -751,10 +751,10 @@ static int parse_options(struct super_block *sb, char *options) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; case Opt_test_dummy_encryption: #ifdef CONFIG_F2FS_FS_ENCRYPTION @@ -783,10 +783,10 @@ static int parse_options(struct super_block *sb, char *options) !strncmp(name, "disable", 7)) { set_opt(sbi, DISABLE_CHECKPOINT); } else { - kfree(name); + kvfree(name); return -EINVAL; } - kfree(name); + kvfree(name); break; default: f2fs_msg(sb, KERN_ERR, @@ -1017,10 +1017,10 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) for (i = 0; i < sbi->s_ndevs; i++) { blkdev_put(FDEV(i).bdev, FMODE_EXCL); #ifdef CONFIG_BLK_DEV_ZONED - kfree(FDEV(i).blkz_type); + kvfree(FDEV(i).blkz_type); #endif } - kfree(sbi->devs); + kvfree(sbi->devs); } static void f2fs_put_super(struct super_block *sb) @@ -1084,25 +1084,25 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi->raw_super); + kvfree(sbi->raw_super); destroy_device_list(sbi); mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); - kfree(sbi); + kvfree(sbi->write_io[i]); + kvfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -1531,7 +1531,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(org_mount_opt.s_qf_names[j]); + kvfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1651,7 +1651,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(org_mount_opt.s_qf_names[i]); + kvfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -1672,7 +1672,7 @@ restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -2800,7 +2800,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) } } - kfree(zones); + kvfree(zones); return err; } @@ -2860,7 +2860,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kfree(super); + kvfree(super); else err = 0; @@ -3369,7 +3369,7 @@ skip_recovery: if (err) goto free_meta; } - kfree(options); + kvfree(options); /* recover broken superblock */ if (recovery) { @@ -3418,7 +3418,7 @@ free_sm: f2fs_destroy_segment_manager(sbi); free_devices: destroy_device_list(sbi); - kfree(sbi->ckpt); + kvfree(sbi->ckpt); free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); @@ -3428,19 +3428,19 @@ free_percpu: destroy_percpu_info(sbi); free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) - kfree(sbi->write_io[i]); + kvfree(sbi->write_io[i]); free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(F2FS_OPTION(sbi).s_qf_names[i]); + kvfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif - kfree(options); + kvfree(options); free_sb_buf: - kfree(raw_super); + kvfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kfree(sbi); + kvfree(sbi); /* give only one another chance */ if (retry) { From e974cb4b05aac0ce96e0674336f53e02a242b07a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 13 Dec 2018 16:53:57 -0800 Subject: [PATCH 32/45] f2fs: correct wrong spelling, issing_* Let's use "queued" instead of "issuing". Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++-- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/segment.c | 26 +++++++++++++------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 11d4448e8e09..ebcc121920ba 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -64,7 +64,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = - atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + atomic_read(&SM_I(sbi)->fcc_info->queued_flush); si->flush_list_empty = llist_empty(&SM_I(sbi)->fcc_info->issue_list); } @@ -72,7 +72,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nr_discarded = atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = - atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + atomic_read(&SM_I(sbi)->dcc_info->queued_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 422d5a975a2f..b4792fb1c624 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -285,7 +285,7 @@ struct discard_cmd { struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ unsigned char state; /* state */ - unsigned char issuing; /* issuing discard */ + unsigned char queued; /* queued discard */ int error; /* bio error */ spinlock_t lock; /* for state/bio_ref updating */ unsigned short bio_ref; /* bio reference count */ @@ -327,7 +327,7 @@ struct discard_cmd_control { unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ - atomic_t issing_discard; /* # of issing discard */ + atomic_t queued_discard; /* # of queued discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ @@ -892,7 +892,7 @@ struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ atomic_t issued_flush; /* # of issued flushes */ - atomic_t issing_flush; /* # of issing flushes */ + atomic_t queued_flush; /* # of queued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -2166,8 +2166,8 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) get_pages(sbi, F2FS_WB_CP_DATA) || get_pages(sbi, F2FS_DIO_READ) || get_pages(sbi, F2FS_DIO_WRITE) || - atomic_read(&SM_I(sbi)->dcc_info->issing_discard) || - atomic_read(&SM_I(sbi)->fcc_info->issing_flush)) + atomic_read(&SM_I(sbi)->dcc_info->queued_discard) || + atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) return false; return f2fs_time_over(sbi, type); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index de2c91964330..0f7a92be678a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -621,16 +621,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { - atomic_inc(&fcc->issing_flush); + atomic_inc(&fcc->queued_flush); ret = submit_flush_wait(sbi, ino); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) { ret = submit_flush_wait(sbi, ino); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); atomic_inc(&fcc->issued_flush); return ret; @@ -649,14 +649,14 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct llist_node *list; list = llist_del_all(&fcc->issue_list); if (!list) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); } else { struct flush_cmd *tmp, *next; @@ -665,7 +665,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { cmd.ret = ret; - atomic_dec(&fcc->issing_flush); + atomic_dec(&fcc->queued_flush); continue; } tmp->ret = ret; @@ -694,7 +694,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); - atomic_set(&fcc->issing_flush, 0); + atomic_set(&fcc->queued_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -910,7 +910,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->len = len; dc->ref = 0; dc->state = D_PREP; - dc->issuing = 0; + dc->queued = 0; dc->error = 0; init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); @@ -943,7 +943,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_sub(dc->issuing, &dcc->issing_discard); + atomic_sub(dc->queued, &dcc->queued_discard); list_del(&dc->list); rb_erase_cached(&dc->rb_node, &dcc->root); @@ -1146,8 +1146,8 @@ submit: dc->bio_ref++; spin_unlock_irqrestore(&dc->lock, flags); - atomic_inc(&dcc->issing_discard); - dc->issuing++; + atomic_inc(&dcc->queued_discard); + dc->queued++; list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ @@ -1997,7 +1997,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); - atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; From 421b01244d46b3a12cdd556c9feda2b529a94a0b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 13 Dec 2018 20:50:51 -0800 Subject: [PATCH 33/45] f2fs: flush stale issued discard candidates Sometimes, I could observe # of issuing_discard to be 1 which blocks background jobs due to is_idle()=false. The only way to get out of it was to trigger gc_urgent. This patch avoids that by checking any candidates as done in the list. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0f7a92be678a..d9a3345a244a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1652,6 +1652,10 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) dcc->discard_wake = 0; + /* clean up pending candidates before going to sleep */ + if (atomic_read(&dcc->queued_discard)) + __wait_all_discard_cmd(sbi, NULL); + if (try_to_freeze()) continue; if (f2fs_readonly(sbi->sb)) From 0204c10ec488b94bb7c502b2f2089561f3f52454 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Dec 2018 18:12:32 +0800 Subject: [PATCH 34/45] f2fs: clean up checkpoint flow This patch cleans up checkpoint flow a bit: - remove unneeded circulation of flushing meta pages. - don't flush nat_bits pages in prior to other checkpoint pages. - add bug_on to check remained meta pages after flushing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1d8283370861..5401ea5e491d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1329,11 +1329,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) int err; /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } + f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* * modify checkpoint @@ -1406,14 +1404,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); - - /* Flush all the NAT BITS pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) { - f2fs_sync_meta_pages(sbi, META, LONG_MAX, - FS_CP_META_IO); - if (unlikely(f2fs_cp_error(sbi))) - break; - } } /* write out checkpoint buffer at block 0 */ @@ -1449,6 +1439,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) && + !f2fs_cp_error(sbi)); /* wait for previous submitted meta pages writeback */ f2fs_wait_on_all_pages_writeback(sbi); From 6ec0ba6375b99d4d94013db82b01f348630ccfec Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Dec 2018 16:39:24 +0530 Subject: [PATCH 35/45] f2fs: fix sbi->extent_list corruption issue When there is a failure in f2fs_fill_super() after/during the recovery of fsync'd nodes, it frees the current sbi and retries again. This time the mount is successful, but the files that got recovered before retry, still holds the extent tree, whose extent nodes list is corrupted since sbi and sbi->extent_list is freed up. The list_del corruption issue is observed when the file system is getting unmounted and when those recoverd files extent node is being freed up in the below context. list_del corruption. prev->next should be fffffff1e1ef5480, but was (null) <...> kernel BUG at kernel/msm-4.14/lib/list_debug.c:53! lr : __list_del_entry_valid+0x94/0xb4 pc : __list_del_entry_valid+0x94/0xb4 <...> Call trace: __list_del_entry_valid+0x94/0xb4 __release_extent_node+0xb0/0x114 __free_extent_tree+0x58/0x7c f2fs_shrink_extent_tree+0xdc/0x3b0 f2fs_leave_shrinker+0x28/0x7c f2fs_put_super+0xfc/0x1e0 generic_shutdown_super+0x70/0xf4 kill_block_super+0x2c/0x5c kill_f2fs_super+0x44/0x50 deactivate_locked_super+0x60/0x8c deactivate_super+0x68/0x74 cleanup_mnt+0x40/0x78 __cleanup_mnt+0x1c/0x28 task_work_run+0x48/0xd0 do_notify_resume+0x678/0xe98 work_pending+0x8/0x14 Fix this by not creating extents for those recovered files if shrinker is not registered yet. Once mount is successful and shrinker is registered, those files can have extents again. Signed-off-by: Sahitya Tummala Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 ++++++++++- fs/f2fs/shrinker.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b4792fb1c624..35a4b8ea7c22 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2694,10 +2694,19 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { - if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT)) return false; + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + return S_ISREG(inode->i_mode); } diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 9e13db994fdf..a467aca29cfe 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -135,6 +135,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); spin_lock(&f2fs_list_lock); - list_del(&sbi->s_list); + list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); } From 3ff88a44b8c5abbc14391d4f32bf34897bcf988e Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Tue, 18 Dec 2018 17:32:23 +0800 Subject: [PATCH 36/45] f2fs: fix block address for __check_sit_bitmap Should use lstart (logical start address) instead of start (in dev) here. This fixes a bug in multi-device scenarios. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d9a3345a244a..009971ea8f08 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1151,7 +1151,7 @@ submit: list_move_tail(&dc->list, wait_list); /* sanity check on discard range */ - __check_sit_bitmap(sbi, start, start + len); + __check_sit_bitmap(sbi, lstart, lstart + len); bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; From 5648f87d9ca7151e3e58230411e9a884379c5027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Dec 2018 19:20:16 +0800 Subject: [PATCH 37/45] f2fs: clean up structure extent_node The union in struct extent_node wass only to indicate below fields struct rb_node rb_node; union { struct { unsigned int fofs; unsigned int len; ... ... can be parsed as fields in struct rb_entry, but they were never be used explicitly before, so let's remove them for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 35a4b8ea7c22..3ffb423ac04b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -559,16 +559,8 @@ struct extent_info { }; struct extent_node { - struct rb_node rb_node; - union { - struct { - unsigned int fofs; - unsigned int len; - u32 blk; - }; - struct extent_info ei; /* extent info */ - - }; + struct rb_node rb_node; /* rb node located in rb-tree */ + struct extent_info ei; /* extent info */ struct list_head list; /* node in global extent list of sbi */ struct extent_tree *et; /* extent tree pointer */ }; From d81bb3e365a34558dc4d526c80dd42f7587266fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Dec 2018 19:20:17 +0800 Subject: [PATCH 38/45] f2fs: fix to dirty inode synchronously If user change inode's i_flags via ioctl, let's add it into global dirty list, so that checkpoint can guarantee its persistence before fsync, it can make checkpoint keeping strong consistency. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ca9bdbb8651b..7bf28be811ab 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1685,7 +1685,7 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); - f2fs_mark_inode_dirty_sync(inode, false); + f2fs_mark_inode_dirty_sync(inode, true); return 0; } From 8b15da81e6c3677a1d485d831a64a278f412f03b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Dec 2018 17:08:26 -0800 Subject: [PATCH 39/45] f2fs: fix missing unlock(sbi->gc_mutex) This fixes missing unlock call. Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b79677639108..cec7ef27b389 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1457,19 +1457,16 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) sbi->sb->s_flags |= SB_ACTIVE; - mutex_lock(&sbi->gc_mutex); f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { + mutex_lock(&sbi->gc_mutex); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err == -ENODATA) break; - if (err && err != -EAGAIN) { - mutex_unlock(&sbi->gc_mutex); + if (err && err != -EAGAIN) return err; - } } - mutex_unlock(&sbi->gc_mutex); err = sync_filesystem(sbi->sb); if (err) From 532960574600738ef18ba35aced69e3a3853af4a Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 22 Dec 2018 11:22:26 +0100 Subject: [PATCH 40/45] f2fs: fix validation of the block count in sanity_check_raw_super Treat "block_count" from struct f2fs_super_block as 64-bit little endian value in sanity_check_raw_super() because struct f2fs_super_block declares "block_count" as "__le64". This fixes a bug where the superblock validation fails on big endian devices with the following error: F2FS-fs (sda1): Wrong segment_count / block_count (61439 > 0) F2FS-fs (sda1): Can't find valid F2FS filesystem in 1th superblock F2FS-fs (sda1): Wrong segment_count / block_count (61439 > 0) F2FS-fs (sda1): Can't find valid F2FS filesystem in 2th superblock As result of this the partition cannot be mounted. With this patch applied the superblock validation works fine and the partition can be mounted again: F2FS-fs (sda1): Mounted with checkpoint version = 7c84 My little endian x86-64 hardware was able to mount the partition without this fix. To confirm that mounting f2fs filesystems works on big endian machines again I tested this on a 32-bit MIPS big endian (lantiq) device. Fixes: 0cfe75c5b01199 ("f2fs: enhance sanity_check_raw_super() to avoid potential overflows") Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cec7ef27b389..9b17e367aa8c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2493,10 +2493,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } - if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) { + if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %u)", - segment_count, le32_to_cpu(raw_super->block_count)); + "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } From 02b11f78d12ca52094e07ce5611be0e1879794b1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Dec 2018 17:43:42 +0800 Subject: [PATCH 41/45] f2fs: check PageWriteback flag for ordered case For all ordered cases in f2fs_wait_on_page_writeback(), we need to check PageWriteback status, so let's clean up to relocate the check into f2fs_wait_on_page_writeback(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 +++----- fs/f2fs/data.c | 9 ++++----- fs/f2fs/dir.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 6 +++--- fs/f2fs/gc.c | 10 +++++----- fs/f2fs/inline.c | 16 ++++++++-------- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 19 ++++++++----------- fs/f2fs/node.h | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 14 ++++++++------ fs/f2fs/xattr.c | 4 ++-- 13 files changed, 50 insertions(+), 54 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5401ea5e491d..f955cd3e0677 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -44,7 +44,7 @@ repeat: cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); if (!PageUptodate(page)) SetPageUptodate(page); return page; @@ -370,9 +370,8 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, META, true); + f2fs_wait_on_page_writeback(page, META, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1290,8 +1289,7 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, struct page *page = f2fs_grab_meta_page(sbi, blk_addr); int err; - f2fs_wait_on_page_writeback(page, META, true); - f2fs_bug_on(sbi, PageWriteback(page)); + f2fs_wait_on_page_writeback(page, META, true, true); memcpy(page_address(page), src, PAGE_SIZE); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8934147cb3a2..9c2d8fcf6064 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -626,7 +626,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn) */ void f2fs_set_data_blkaddr(struct dnode_of_data *dn) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); __set_data_blkaddr(dn); if (set_page_dirty(dn->node_page)) dn->node_changed = true; @@ -656,7 +656,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true); + f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = datablock_addr(dn->inode, @@ -2149,12 +2149,11 @@ continue_unlock: if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, - DATA, true); + DATA, true, true); else goto continue_unlock; } - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -2472,7 +2471,7 @@ repeat: } } - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9f2cc6c0a4c8..7ff9e993008e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -293,7 +293,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, type, true); + f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); set_page_dirty(page); @@ -307,7 +307,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); @@ -550,7 +550,7 @@ start: ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true); + f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { down_write(&F2FS_I(inode)->i_sem); @@ -705,7 +705,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, return f2fs_delete_inline_entry(dentry, page, dir, inode); lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3ffb423ac04b..86536e9f019c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3038,7 +3038,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered); + enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7bf28be811ab..bba56b39dcc5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -82,7 +82,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); + f2fs_wait_on_page_writeback(page, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -578,7 +578,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, offset, PAGE_SIZE - offset); /* An encrypted inode should have a key and truncate the last page. */ @@ -895,7 +895,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, if (IS_ERR(page)) return PTR_ERR(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); zero_user(page, start, len); set_page_dirty(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index aed7084e8443..195cf0f9d9ef 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -679,7 +679,7 @@ got_it: * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -768,7 +768,7 @@ static int move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -829,7 +829,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } write_page: - f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true); + f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -838,7 +838,7 @@ write_page: ClearPageError(page); /* allocate block address */ - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; @@ -924,7 +924,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, bool is_dirty = PageDirty(page); retry: - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); set_page_dirty(page); if (clear_page_dirty_for_io(page)) { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3544c73a1290..9804cdcf976e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -72,7 +72,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, addr = inline_data_addr(inode, ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); set_page_dirty(ipage); @@ -161,7 +161,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); f2fs_outplace_write_data(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); f2fs_remove_dirty_inode(dn->inode); @@ -236,7 +236,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true); + f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); src_addr = kmap_atomic(page); dst_addr = inline_data_addr(inode, dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); @@ -277,7 +277,7 @@ process_inline: ipage = f2fs_get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); src_addr = inline_data_addr(inode, npage); dst_addr = inline_data_addr(inode, ipage); @@ -391,7 +391,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, goto out; } - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); dentry_blk = page_address(page); @@ -505,7 +505,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, return 0; recover: lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); @@ -565,7 +565,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, } } - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); name_hash = f2fs_dentry_hash(new_name, NULL); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); @@ -597,7 +597,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); inline_dentry = inline_data_addr(dir, page); make_dentry_ptr_inline(dir, &d, inline_dentry); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f63d4fe6156d..bec52961630b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -103,7 +103,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); set_raw_inline(inode, F2FS_INODE(ipage)); @@ -497,7 +497,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); f2fs_inode_synced(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b4f0cdcb9b19..6162d2cc5b16 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1106,7 +1106,7 @@ skip_partial: ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); @@ -1234,7 +1234,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) @@ -1600,8 +1600,7 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) .for_reclaim = 0, }; - f2fs_wait_on_page_writeback(node_page, NODE, true); - f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); @@ -1692,8 +1691,7 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true); - BUG_ON(PageWriteback(page)); + f2fs_wait_on_page_writeback(page, NODE, true, true); set_fsync_mark(page, 0); set_dentry_mark(page, 0); @@ -1744,7 +1742,7 @@ continue_unlock: "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true); + f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; @@ -1825,9 +1823,8 @@ continue_unlock: goto lock_node; } - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, true); - BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; @@ -1894,7 +1891,7 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, get_page(page); spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - f2fs_wait_on_page_writeback(page, NODE, true); + f2fs_wait_on_page_writeback(page, NODE, true, false); if (TestClearPageError(page)) ret = -EIO; @@ -2472,7 +2469,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: f2fs_update_inode(inode, ipage); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1c73d879a9bc..e05af5df5648 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -361,7 +361,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - f2fs_wait_on_page_writeback(p, NODE, true); + f2fs_wait_on_page_writeback(p, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c22f9d0011ba..e3883db868d8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -539,7 +539,7 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true); + f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); err = f2fs_get_node_info(sbi, dn.nid, &ni); if (err) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 009971ea8f08..9b79056d705d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -229,7 +229,7 @@ static int __revoke_inmem_pages(struct inode *inode, lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); if (recover) { struct dnode_of_data dn; @@ -387,7 +387,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) if (page->mapping == inode->i_mapping) { trace_f2fs_commit_inmem_page(page, INMEM); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA, true, true); set_page_dirty(page); if (clear_page_dirty_for_io(page)) { @@ -3279,16 +3279,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered) + enum page_type type, bool ordered, bool locked) { if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); - if (ordered) + if (ordered) { wait_on_page_writeback(page); - else + f2fs_bug_on(sbi, locked && PageWriteback(page)); + } else { wait_for_stable_page(page); + } } } @@ -3305,7 +3307,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true); + f2fs_wait_on_page_writeback(cpage, DATA, true, true); f2fs_put_page(cpage, 1); } } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7261245c208d..f44b0c38398b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -415,7 +415,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } f2fs_wait_on_page_writeback(ipage ? ipage : in_page, - NODE, true); + NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { err = f2fs_truncate_xattr_node(inode); @@ -439,7 +439,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true); + f2fs_wait_on_page_writeback(xpage, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); From c3a58d60faac8042df8122f40319c03b425344ba Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 26 Dec 2018 11:20:29 +0530 Subject: [PATCH 42/45] f2fs: fix use-after-free issue when accessing sbi->stat_info iput() on sbi->node_inode can update sbi->stat_info in the below context, if the f2fs_write_checkpoint() has failed with error. f2fs_balance_fs_bg+0x1ac/0x1ec f2fs_write_node_pages+0x4c/0x260 do_writepages+0x80/0xbc __writeback_single_inode+0xdc/0x4ac writeback_single_inode+0x9c/0x144 write_inode_now+0xc4/0xec iput+0x194/0x22c f2fs_put_super+0x11c/0x1e8 generic_shutdown_super+0x70/0xf4 kill_block_super+0x2c/0x5c kill_f2fs_super+0x44/0x50 deactivate_locked_super+0x60/0x8c deactivate_super+0x68/0x74 cleanup_mnt+0x40/0x78 Fix this by moving f2fs_destroy_stats() further below iput() in both f2fs_put_super() and f2fs_fill_super() paths. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9b17e367aa8c..c46a1d4318d4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1058,9 +1058,6 @@ static void f2fs_put_super(struct super_block *sb) f2fs_write_checkpoint(sbi, &cpc); } - /* f2fs_write_checkpoint can update stat informaion */ - f2fs_destroy_stats(sbi); - /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. @@ -1080,6 +1077,12 @@ static void f2fs_put_super(struct super_block *sb) iput(sbi->node_inode); iput(sbi->meta_inode); + /* + * iput() can update stat information, if f2fs_write_checkpoint() + * above failed with error. + */ + f2fs_destroy_stats(sbi); + /* destroy f2fs internal modules */ f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); @@ -3256,30 +3259,30 @@ try_onemore: f2fs_build_gc_manager(sbi); + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; + /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); - goto free_nm; + goto free_stats; } - err = f2fs_build_stats(sbi); - if (err) - goto free_node_inode; - /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); err = PTR_ERR(root); - goto free_stats; + goto free_node_inode; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size || !root->i_nlink) { iput(root); err = -EINVAL; - goto free_stats; + goto free_node_inode; } sb->s_root = d_make_root(root); /* allocate root dentry */ @@ -3403,12 +3406,12 @@ free_meta: free_root_inode: dput(sb->s_root); sb->s_root = NULL; -free_stats: - f2fs_destroy_stats(sbi); free_node_inode: f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); +free_stats: + f2fs_destroy_stats(sbi); free_nm: f2fs_destroy_node_manager(sbi); free_sm: From f03dd7151e62504e366ed2a3a81ac194f7d67249 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Dec 2018 19:54:07 -0800 Subject: [PATCH 43/45] f2fs: sanity check of xattr entry size There is a security report where f2fs_getxattr() has a hole to expose wrong memory region when the image is malformed like this. f2fs_getxattr: entry->e_name_len: 4, size: 12288, buffer_size: 16384, len: 4 Cc: Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index f44b0c38398b..18d5ffbc5e8c 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -288,7 +288,7 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, - void **base_addr) + void **base_addr, int *base_size) { void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; @@ -299,8 +299,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), - inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); + *base_size = inline_size + size + XATTR_PADDING_SIZE; + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS); if (!txattr_addr) return -ENOMEM; @@ -312,8 +312,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); - if (*xe) + if (*xe) { + *base_size = inline_size; goto check; + } } /* read from xattr node block */ @@ -474,6 +476,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, int error = 0; unsigned int size, len; void *base_addr = NULL; + int base_size; if (name == NULL) return -EINVAL; @@ -484,7 +487,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, - &entry, &base_addr); + &entry, &base_addr, &base_size); up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -498,6 +501,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (buffer) { char *pval = entry->e_name + entry->e_name_len; + + if (base_size - (pval - (char *)base_addr) < size) { + error = -ERANGE; + goto out; + } memcpy(buffer, pval, size); } error = size; From 0af3fa72a38c2001e825732aea6a18a5f9ea7dad Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Dec 2018 11:00:38 -0800 Subject: [PATCH 44/45] f2fs: wait on atomic writes to count F2FS_CP_WB_DATA Otherwise, we can get wrong counts incurring checkpoint hang. IO_W (CP: -24, Data: 24, Flush: ( 0 0 1), Discard: ( 0 0)) Thread A Thread B - f2fs_write_data_pages - __write_data_page - f2fs_submit_page_write - inc_page_count(F2FS_WB_DATA) type is F2FS_WB_DATA due to file is non-atomic one - f2fs_ioc_start_atomic_write - set_inode_flag(FI_ATOMIC_FILE) - f2fs_write_end_io - dec_page_count(F2FS_WB_CP_DATA) type is F2FS_WB_DATA due to file becomes atomic one Cc: Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bba56b39dcc5..ae2b45e75847 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1750,10 +1750,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (!get_dirty_pages(inode)) - goto skip_flush; - - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, + /* + * Should wait end_io to count F2FS_WB_CP_DATA correctly by + * f2fs_is_atomic_file. + */ + if (get_dirty_pages(inode)) + f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); @@ -1761,7 +1763,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } -skip_flush: + set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); From 63c83da9b8da32cd1ac141eac9cd3d054db67504 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 1 Jan 2019 00:11:30 -0800 Subject: [PATCH 45/45] f2fs: don't access node/meta inode mapping after iput This fixes wrong access of address spaces of node and meta inodes after iput. Fixes: 60aa4d5536ab ("f2fs: fix use-after-free issue when accessing sbi->stat_info") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 19 ++++++++++++------- fs/f2fs/super.c | 5 +++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ebcc121920ba..503fde8349e6 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -96,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = NODE_MAPPING(sbi)->nrpages; - si->meta_pages = META_MAPPING(sbi)->nrpages; + if (sbi->node_inode) + si->node_pages = NODE_MAPPING(sbi)->nrpages; + if (sbi->meta_inode) + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); @@ -175,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned npages; int i; if (si->base_mem) @@ -258,10 +259,14 @@ get_cache: sizeof(struct extent_node); si->page_mem = 0; - npages = NODE_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + if (sbi->node_inode) { + unsigned npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } + if (sbi->meta_inode) { + unsigned npages = META_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; + } } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c46a1d4318d4..14f033e1ab42 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1075,7 +1075,10 @@ static void f2fs_put_super(struct super_block *sb) f2fs_bug_on(sbi, sbi->fsync_node_num); iput(sbi->node_inode); + sbi->node_inode = NULL; + iput(sbi->meta_inode); + sbi->meta_inode = NULL; /* * iput() can update stat information, if f2fs_write_checkpoint() @@ -3410,6 +3413,7 @@ free_node_inode: f2fs_release_ino_entry(sbi, true); truncate_inode_pages_final(NODE_MAPPING(sbi)); iput(sbi->node_inode); + sbi->node_inode = NULL; free_stats: f2fs_destroy_stats(sbi); free_nm: @@ -3422,6 +3426,7 @@ free_devices: free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); + sbi->meta_inode = NULL; free_io_dummy: mempool_destroy(sbi->write_io_dummy); free_percpu: