From 0b1504c8e8d2e2e1d78dd48cda0884bf9721f4fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 May 2019 10:09:22 +0800 Subject: [PATCH 01/47] f2fs: fix to check layout on last valid checkpoint park As Ju Hyung reported: " I was semi-forced today to use the new kernel and test f2fs. My Ubuntu initramfs got a bit wonky and I had to boot into live CD and fix some stuffs. The live CD was using 4.15 kernel, and just mounting the f2fs partition there corrupted f2fs and my 4.19(with 5.1-rc1-4.19 f2fs-stable merged) refused to mount with "SIT is corrupted node" message. I used the latest f2fs-tools sent by Chao including "fsck.f2fs: fix to repair cp_loads blocks at correct position" It spit out 140M worth of output, but at least I didn't have to run it twice. Everything returned "Ok" in the 2nd run. The new log is at http://arter97.com/f2fs/final After fixing the image, I used my 4.19 kernel with 5.2-rc1-4.19 f2fs-stable merged and it mounted. But, I got this: [ 1.047791] F2FS-fs (nvme0n1p3): layout of large_nat_bitmap is deprecated, run fsck to repair, chksum_offset: 4092 [ 1.081307] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint [ 1.161520] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs [ 1.162418] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7e00 But after doing a reboot, the message is gone: [ 1.098423] F2FS-fs (nvme0n1p3): Found nat_bits in checkpoint [ 1.177771] F2FS-fs (nvme0n1p3): recover fsync data on readonly fs [ 1.178365] F2FS-fs (nvme0n1p3): Mounted with checkpoint version = 761c7eda I'm not exactly sure why the kernel detected that I'm still using the old layout on the first boot. Maybe fsck didn't fix it properly, or the check from the kernel is improper. " Although we have rebuild the old deprecated checkpoint with new layout during repair, we only repair last checkpoint park, the other old one is remained. Once the image was mounted, we will 1) sanity check layout and 2) decide which checkpoint park to use according to cp_ver. So that we will print reported message unnecessarily at step 1), to avoid it, we simply move layout check into f2fs_sanity_check_ckpt() after step 2). Reported-by: Park Ju Hyung Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 11 ----------- fs/f2fs/super.c | 9 +++++++++ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c07b71037ee..89825261d474 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -832,17 +832,6 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, return -EINVAL; } - if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { - if (crc_offset != CP_MIN_CHKSUM_OFFSET) { - f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "layout of large_nat_bitmap is deprecated, " - "run fsck to repair, chksum_offset: %zu", - crc_offset); - return -EINVAL; - } - } - crc = f2fs_checkpoint_chksum(sbi, *cp_block); if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d8de3bfad4fb..b23a2715a4a0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2723,6 +2723,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) return 1; } + if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && + le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { + f2fs_msg(sbi->sb, KERN_WARNING, + "layout of large_nat_bitmap is deprecated, " + "run fsck to repair, chksum_offset: %u", + le32_to_cpu(ckpt->checksum_offset)); + return 1; + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; From ca5364759f2a2ed0da6716c094571f573f4e9ca8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2019 15:54:49 -0700 Subject: [PATCH 02/47] f2fs: allow ssr block allocation during checkpoint=disable period This patch allows to use ssr during checkpoint is disabled. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 963fb4571fd9..1e029da26053 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -387,7 +387,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, goto next; /* Don't touch checkpointed data */ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && - get_ckpt_valid_blocks(sbi, segno))) + get_ckpt_valid_blocks(sbi, segno) && + p.alloc_mode != SSR)) goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; From afc60025d476e64cc8ba0d475f21fa71dba871af Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 19 Feb 2019 16:15:29 +0800 Subject: [PATCH 03/47] f2fs: add bio cache for IPU SQLite in Wal mode may trigger sequential IPU write in db-wal file, after commit d1b3e72d5490 ("f2fs: submit bio of in-place-update pages"), we lost the chance of merging page in inner managed bio cache, result in submitting more small-sized IO. So let's add temporary bio in writepages() to cache mergeable write IO as much as possible. Test case: 1. xfs_io -f /mnt/f2fs/file -c "pwrite 0 65536" -c "fsync" 2. xfs_io -f /mnt/f2fs/file -c "pwrite 0 65536" -c "fsync" Before: f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65544, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65552, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65560, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65568, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65576, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65584, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65592, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65600, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65608, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65616, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65624, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65632, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65640, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65648, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65656, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65664, size = 4096 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), NODE, sector = 57352, size = 4096 After: f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), DATA, sector = 65544, size = 65536 f2fs_submit_write_bio: dev = (251,0)/(251,0), rw = WRITE(S), NODE, sector = 57368, size = 4096 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 88 ++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 3 ++ fs/f2fs/segment.c | 5 ++- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5cfe182e8922..e491e872cb47 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -347,20 +347,20 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, +static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; int i; - if (!io->bio) + if (!bio) return false; if (!inode && !page && !ino) return true; - bio_for_each_segment_all(bvec, io->bio, i) { + bio_for_each_segment_all(bvec, bio, i) { if (bvec->bv_page->mapping) target = bvec->bv_page; @@ -411,7 +411,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); + ret = __has_merged_page(io->bio, inode, page, ino); up_read(&io->io_rwsem); } if (ret) @@ -481,6 +481,61 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } +int f2fs_merge_page_bio(struct f2fs_io_info *fio) +{ + struct bio *bio = *fio->bio; + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFAULT; + + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + + if (bio && (*fio->last_block + 1 != fio->new_blkaddr || + !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + } +alloc_new: + if (!bio) { + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + BIO_MAX_PAGES, false, fio->type, fio->temp); + bio_set_op_attrs(bio, fio->op, fio->op_flags); + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + goto alloc_new; + } + + if (fio->io_wbc) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + + inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + + *fio->last_block = fio->new_blkaddr; + *fio->bio = bio; + + return 0; +} + +static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + if (!bio) + return; + + if (!__has_merged_page(*bio, NULL, page, 0)) + return; + + __submit_bio(sbi, *bio, DATA); + *bio = NULL; +} + void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -1947,6 +2002,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, + struct bio **bio, + sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type) { @@ -1972,6 +2029,8 @@ static int __write_data_page(struct page *page, bool *submitted, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, + .bio = bio, + .last_block = last_block, }; trace_f2fs_writepage(page, DATA); @@ -2070,10 +2129,13 @@ out: unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) + !F2FS_I(inode)->cp_task) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_balance_fs(sbi, need_balance_fs); + } if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -2100,7 +2162,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc, FS_DATA_IO); + return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); } /* @@ -2116,6 +2178,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int done = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct bio *bio = NULL; + sector_t last_block; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2192,17 +2256,20 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) + if (wbc->sync_mode != WB_SYNC_NONE) { f2fs_wait_on_page_writeback(page, DATA, true, true); - else + f2fs_submit_ipu_bio(sbi, &bio, page); + } else { goto continue_unlock; + } } if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc, io_type); + ret = __write_data_page(page, &submitted, &bio, + &last_block, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -2251,6 +2318,9 @@ continue_unlock: if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, NULL, 0, DATA); + /* submit cached bio of IPU write */ + if (bio) + __submit_bio(sbi, bio, DATA); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a440d7df6f41..900cbea7f759 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1052,6 +1052,8 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ + struct bio **bio; /* bio for ipu */ + sector_t *last_block; /* last block number in bio */ unsigned char version; /* version of the node */ }; @@ -3170,6 +3172,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); +int f2fs_merge_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8dee063c833f..dbc96b2a05fa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3202,7 +3202,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); - err = f2fs_submit_page_bio(fio); + if (fio->bio) + err = f2fs_merge_page_bio(fio); + else + err = f2fs_submit_page_bio(fio); if (!err) { update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); From dcc6b712c66162f33ccb0c3780db5b25cfa51929 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 14 May 2019 15:36:23 +0900 Subject: [PATCH 04/47] f2fs: always assume that the device is idle under gc_urgent This allows more aggressive discards and balancing job to be done under gc_urgent. Signed-off-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 900cbea7f759..e3818fbf853b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2194,6 +2194,9 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + if (sbi->gc_mode == GC_URGENT) + return true; + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || From 9448078cc5760e1be9d232218ffa5c73053ffd5f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 May 2019 17:36:59 +0800 Subject: [PATCH 05/47] f2fs: fix to avoid deadloop if data_flush is on As Hagbard Celine reported: [ 615.697824] INFO: task kworker/u16:5:344 blocked for more than 120 seconds. [ 615.697825] Not tainted 5.0.15-gentoo-f2fslog #4 [ 615.697826] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 615.697827] kworker/u16:5 D 0 344 2 0x80000000 [ 615.697831] Workqueue: writeback wb_workfn (flush-259:0) [ 615.697832] Call Trace: [ 615.697836] ? __schedule+0x2c5/0x8b0 [ 615.697839] schedule+0x32/0x80 [ 615.697841] schedule_preempt_disabled+0x14/0x20 [ 615.697842] __mutex_lock.isra.8+0x2ba/0x4d0 [ 615.697845] ? log_store+0xf5/0x260 [ 615.697848] f2fs_write_data_pages+0x133/0x320 [ 615.697851] ? trace_hardirqs_on+0x2c/0xe0 [ 615.697854] do_writepages+0x41/0xd0 [ 615.697857] __filemap_fdatawrite_range+0x81/0xb0 [ 615.697859] f2fs_sync_dirty_inodes+0x1dd/0x200 [ 615.697861] f2fs_balance_fs_bg+0x2a7/0x2c0 [ 615.697863] ? up_read+0x5/0x20 [ 615.697865] ? f2fs_do_write_data_page+0x2cb/0x940 [ 615.697867] f2fs_balance_fs+0xe5/0x2c0 [ 615.697869] __write_data_page+0x1c8/0x6e0 [ 615.697873] f2fs_write_cache_pages+0x1e0/0x450 [ 615.697878] f2fs_write_data_pages+0x14b/0x320 [ 615.697880] ? trace_hardirqs_on+0x2c/0xe0 [ 615.697883] do_writepages+0x41/0xd0 [ 615.697885] __filemap_fdatawrite_range+0x81/0xb0 [ 615.697887] f2fs_sync_dirty_inodes+0x1dd/0x200 [ 615.697889] f2fs_balance_fs_bg+0x2a7/0x2c0 [ 615.697891] f2fs_write_node_pages+0x51/0x220 [ 615.697894] do_writepages+0x41/0xd0 [ 615.697897] __writeback_single_inode+0x3d/0x3d0 [ 615.697899] writeback_sb_inodes+0x1e8/0x410 [ 615.697902] __writeback_inodes_wb+0x5d/0xb0 [ 615.697904] wb_writeback+0x28f/0x340 [ 615.697906] ? cpumask_next+0x16/0x20 [ 615.697908] wb_workfn+0x33e/0x420 [ 615.697911] process_one_work+0x1a1/0x3d0 [ 615.697913] worker_thread+0x30/0x380 [ 615.697915] ? process_one_work+0x3d0/0x3d0 [ 615.697916] kthread+0x116/0x130 [ 615.697918] ? kthread_create_worker_on_cpu+0x70/0x70 [ 615.697921] ret_from_fork+0x3a/0x50 There is still deadloop in below condition: d A - do_writepages - f2fs_write_node_pages - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - f2fs_write_cache_pages - mutex_lock(&sbi->writepages) -- lock once - __write_data_page - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - f2fs_write_data_pages - mutex_lock(&sbi->writepages) -- lock again Thread A Thread B - do_writepages - f2fs_write_node_pages - f2fs_balance_fs_bg - f2fs_sync_dirty_inodes - .cp_task = current - f2fs_sync_dirty_inodes - .cp_task = current - filemap_fdatawrite - .cp_task = NULL - filemap_fdatawrite - f2fs_write_cache_pages - enter f2fs_balance_fs_bg since .cp_task is NULL - .cp_task = NULL Change as below to avoid this: - add condition to avoid holding .writepages mutex lock in path of data flush - introduce mutex lock sbi.flush_lock to exclude concurrent data flush in background. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 4 ++++ fs/f2fs/super.c | 1 + 4 files changed, 9 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e491e872cb47..8d5c38a14b3f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2332,6 +2332,9 @@ static inline bool __should_serialize_io(struct inode *inode, return false; if (IS_NOQUOTA(inode)) return false; + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e3818fbf853b..4fac140e1a0e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1209,6 +1209,7 @@ struct f2fs_sb_info { /* for inode management */ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ + struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dbc96b2a05fa..5f6e4cd2eff2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; + mutex_lock(&sbi->flush_lock); + blk_start_plug(&plug); f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b23a2715a4a0..58f84fac9d61 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3298,6 +3298,7 @@ try_onemore: INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } + mutex_init(&sbi->flush_lock); f2fs_init_extent_cache_info(sbi); From 6e30778fe3ddc0c73e5867ecf7514db2e7d9912f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 May 2019 20:08:40 -0700 Subject: [PATCH 06/47] f2fs: add missing sysfs entries in documentation This patch cleans up documentation to cover missing sysfs entries. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 89 +++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index f7b5e4ff0de3..66aca042988e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -246,11 +246,14 @@ Files in /sys/fs/f2fs/ .............................................................................. File Content - gc_max_sleep_time This tuning parameter controls the maximum sleep + gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. + 500 ms is set by default. See above gc_urgent. + + gc_min_sleep_time This tuning parameter controls the minimum sleep time for the garbage collection thread. Time is in milliseconds. - gc_min_sleep_time This tuning parameter controls the minimum sleep + gc_max_sleep_time This tuning parameter controls the maximum sleep time for the garbage collection thread. Time is in milliseconds. @@ -270,9 +273,6 @@ Files in /sys/fs/f2fs/ to 1, background thread starts to do GC by given gc_urgent_sleep_time interval. - gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. - 500 ms is set by default. See above gc_urgent. - reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree segments is larger than the number of segments @@ -287,7 +287,16 @@ Files in /sys/fs/f2fs/ checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. - trim_sections This parameter controls the number of sections + discard_granularity This parameter controls the granularity of discard + command size. It will issue discard commands iif + the size is larger than given granularity. Its + unit size is 4KB, and 4 (=16KB) is set by default. + The maximum value is 128 (=512KB). + + reserved_blocks This parameter indicates the number of blocks that + f2fs reserves internally for root. + + batched_trim_sections This parameter controls the number of sections to be trimmed out in batch mode when FITRIM conducts. 32 sections is set by default. @@ -309,11 +318,35 @@ Files in /sys/fs/f2fs/ the number is less than this value, it triggers in-place-updates. + min_seq_blocks This parameter controls the threshold to serialize + write IOs issued by multiple threads in parallel. + + min_hot_blocks This parameter controls the threshold to allocate + a hot data log for pending data blocks to write. + + min_ssr_sections This parameter adds the threshold when deciding + SSR block allocation. If this is large, SSR mode + will be enabled early. + + ram_thresh This parameter controls the memory footprint used + by free nids and cached nat entries. By default, + 10 is set, which indicates 10 MB / 1 GB RAM. + + ra_nid_pages When building free nids, F2FS reads NAT blocks + ahead for speed up. Default is 0. + + dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS + determines flushing them in background. + max_victim_search This parameter controls the number of trials to find a victim segment when conducting SSR and cleaning operations. The default value is 4096 which covers 8GB block address range. + migration_granularity For large-sized sections, F2FS can stop GC given + this granularity instead of reclaiming entire + section. + dir_level This parameter controls the directory level to support large directory. If a directory has a number of files, it can reduce the file lookup @@ -321,9 +354,47 @@ Files in /sys/fs/f2fs/ Otherwise, it needs to decrease this value to reduce the space overhead. The default value is 0. - ram_thresh This parameter controls the memory footprint used - by free nids and cached nat entries. By default, - 10 is set, which indicates 10 MB / 1 GB RAM. + cp_interval F2FS tries to do checkpoint periodically, 60 secs + by default. + + idle_interval F2FS detects system is idle, if there's no F2FS + operations during given interval, 5 secs by + default. + + discard_idle_interval F2FS detects the discard thread is idle, given + time interval. Default is 5 secs. + + gc_idle_interval F2FS detects the GC thread is idle, given time + interval. Default is 5 secs. + + umount_discard_timeout When unmounting the disk, F2FS waits for finishing + queued discard commands which can take huge time. + This gives time out for it, 5 secs by default. + + iostat_enable This controls to enable/disable iostat in F2FS. + + readdir_ra This enables/disabled readahead of inode blocks + in readdir, and default is enabled. + + gc_pin_file_thresh This indicates how many GC can be failed for the + pinned file. If it exceeds this, F2FS doesn't + guarantee its pinning state. 2048 trials is set + by default. + + extension_list This enables to change extension_list for hot/cold + files in runtime. + + inject_rate This controls injection rate of arbitrary faults. + + inject_type This controls injection type of arbitrary faults. + + dirty_segments This shows # of dirty segments. + + lifetime_write_kbytes This shows # of data written to the disk. + + features This shows current features enabled on F2FS. + + current_reserved_blocks This shows # of blocks currently reserved. ================================================================================ USAGE From c297bc0e70eb55e2596c031cf436250a4b83527d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 May 2019 23:07:25 +0800 Subject: [PATCH 07/47] f2fs: fix to do sanity check on segment bitmap of LFS curseg As Jungyeon Reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203233 - Reproduces gcc poc_13.c ./run.sh f2fs - Kernel messages F2FS-fs (sdb): Bitmap was wrongly set, blk:4608 kernel BUG at fs/f2fs/segment.c:2133! RIP: 0010:update_sit_entry+0x35d/0x3e0 Call Trace: f2fs_allocate_data_block+0x16c/0x5a0 do_write_page+0x57/0x100 f2fs_do_write_node_page+0x33/0xa0 __write_node_page+0x270/0x4e0 f2fs_sync_node_pages+0x5df/0x670 f2fs_write_checkpoint+0x364/0x13a0 f2fs_sync_fs+0xa3/0x130 f2fs_do_sync_file+0x1a6/0x810 do_fsync+0x33/0x60 __x64_sys_fsync+0xb/0x10 do_syscall_64+0x43/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The testcase fails because that, in fuzzed image, current segment was allocated with LFS type, its .next_blkoff should point to an unused block address, but actually, its bitmap shows it's not. So during allocation, f2fs crash when setting bitmap. Introducing sanity_check_curseg() to check such inconsistence of current in-used segment. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5f6e4cd2eff2..a034e0da004a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4239,6 +4239,41 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return init_victim_secmap(sbi); } +static int sanity_check_curseg(struct f2fs_sb_info *sbi) +{ + int i; + + /* + * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; + * In LFS curseg, all blkaddr after .next_blkoff should be unused. + */ + for (i = 0; i < NO_CHECK_TYPE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + struct seg_entry *se = get_seg_entry(sbi, curseg->segno); + unsigned int blkofs = curseg->next_blkoff; + + if (f2fs_test_bit(blkofs, se->cur_valid_map)) + goto out; + + if (curseg->alloc_type == SSR) + continue; + + for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + if (!f2fs_test_bit(blkofs, se->cur_valid_map)) + continue; +out: + f2fs_msg(sbi->sb, KERN_ERR, + "Current segment's next free block offset is " + "inconsistent with bitmap, logtype:%u, " + "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); + return -EINVAL; + } + } + return 0; +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -4334,6 +4369,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (err) return err; + err = sanity_check_curseg(sbi); + if (err) + return err; + init_min_max_mtime(sbi); return 0; } From 5b25019dcd4f1bf3bd2421526956bf504f3dc84c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 23 May 2019 09:49:17 +0530 Subject: [PATCH 08/47] f2fs: add error prints for debugging mount failure Add error prints to get more details on the mount failure. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- fs/f2fs/super.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a034e0da004a..51f57393ad5b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3537,8 +3537,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || - sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { + f2fs_msg(sbi->sb, KERN_ERR, + "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; + } return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 58f84fac9d61..c64a4636d1a0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3310,13 +3310,13 @@ try_onemore: err = f2fs_build_segment_manager(sbi); if (err) { f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager"); + "Failed to initialize F2FS segment manager (%d)", err); goto free_sm; } err = f2fs_build_node_manager(sbi); if (err) { f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager"); + "Failed to initialize F2FS node manager (%d)", err); goto free_nm; } From ada14a86a32b69bc56ea69f3eafc4b0aa91d5bed Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 24 May 2019 14:38:39 +0530 Subject: [PATCH 09/47] f2fs: fix f2fs_show_options to show nodiscard mount option Fix f2fs_show_options to show nodiscard mount option. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c64a4636d1a0..4210579eb582 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1318,6 +1318,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); + else + seq_puts(seq, ",nodiscard"); if (test_opt(sbi, NOHEAP)) seq_puts(seq, ",no_heap"); else From b9268af146372ef164c8133e9ad96d4a1606f040 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 May 2019 17:23:33 +0800 Subject: [PATCH 10/47] f2fs: fix sparse warning make C=2 CHECKFLAGS="-D__CHECK_ENDIAN__" CHECK dir.c dir.c:842:50: warning: cast from restricted __le32 CHECK node.c node.c:2759:40: warning: restricted __le32 degrades to integer Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 4 ++-- fs/f2fs/node.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6dd1e4a2daf0..32ca1602c898 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -828,8 +828,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, int save_len = fstr->len; err = fscrypt_fname_disk_to_usr(d->inode, - (u32)de->hash_code, 0, - &de_name, fstr); + (u32)le32_to_cpu(de->hash_code), + 0, &de_name, fstr); if (err) goto out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 56fe5b97c1ba..4e3628e96318 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2725,7 +2725,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, i = 1; } for (; i < NAT_ENTRY_PER_BLOCK; i++) { - if (nat_blk->entries[i].block_addr != NULL_ADDR) + if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } if (valid == 0) { From 3ac4020f78bb5f6fabf3e1d8945ec84f4c8dae87 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:03 -0700 Subject: [PATCH 11/47] f2fs: Lower threshold for disable_cp_again The existing threshold for allowable holes at checkpoint=disable time is too high. The OVP space contains reserved segments, which are always in the form of free segments. These must be subtracted from the OVP value. The current threshold is meant to be the maximum value of holes of a single type we can have and still guarantee that we can fill the disk without failing to find space for a block of a given type. If the disk is full, ignoring current reserved, which only helps us, the amount of unused blocks is equal to the OVP area. Of that, there are reserved segments, which must be free segments, and the rest of the ovp area, which can come from either free segments or holes. The maximum possible amount of holes is OVP-reserved. Now, consider the disk when mounting with checkpoint=disable. We must be able to fill all available free space with either data or node blocks. When we start with checkpoint=disable, holes are locked to their current type. Say we have H of one type of hole, and H+X of the other. We can fill H of that space with arbitrary typed blocks via SSR. For the remaining H+X blocks, we may not have any of a given block type left at all. For instance, if we were to fill the disk entirely with blocks of the type with fewer holes, the H+X blocks of the opposite type would not be used. If H+X > OVP-reserved, there would be more holes than could possibly exist, and we would have failed to find a suitable block earlier on, leading to a crash in update_sit_entry. If H+X <= OVP-reserved, then the holes end up effectively masked by the OVP region in this case. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51f57393ad5b..71f8913aa5f9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -876,7 +876,9 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; block_t holes[2] = {0, 0}; /* DATA and NODE */ struct seg_entry *se; unsigned int segno; @@ -891,10 +893,10 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp || holes[NODE] > ovp) + if (holes[DATA] > ovp_holes || holes[NODE] > ovp_holes) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && - dirty_segments(sbi) > overprovision_segments(sbi)) + dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; return 0; } From d38f8be11bdf31024203a39ee4f0a08afd84ab90 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:04 -0700 Subject: [PATCH 12/47] f2fs: Fix root reserved on remount On a remount, you can currently set root reserved if it was not previously set. This can cause an underflow if reserved has been set to a very high value, since then root reserved + current reserved could be greater than user_block_count. inc_valid_block_count later subtracts out these values from user_block_count, causing an underflow. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4210579eb582..68a3d99f6c27 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -213,7 +213,8 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = (sbi->user_block_count << 1) / 1000; + block_t limit = min((sbi->user_block_count << 1) / 1000, + sbi->user_block_count - sbi->reserved_blocks); /* limit is 0.2% */ if (test_opt(sbi, RESERVE_ROOT) && From c8dfd47f67aba45f29e849da5c91bb9e1d096d10 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:05 -0700 Subject: [PATCH 13/47] f2fs: Fix accounting for unusable blocks Fixes possible underflows when dealing with unusable blocks. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4fac140e1a0e..af1cf2ee9375 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1770,8 +1770,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - avail_user_block_count -= sbi->unusable_block_count; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -1971,7 +1975,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count; + unsigned int valid_node_count, user_block_count; int err; if (is_inode) { @@ -1998,10 +2002,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - valid_block_count += sbi->unusable_block_count; + user_block_count -= sbi->unusable_block_count; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count > user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } From 77d88aa991eab9287863269a9072445903d12fe5 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Wed, 29 May 2019 17:49:06 -0700 Subject: [PATCH 14/47] f2fs: Add option to limit required GC for checkpoint=disable This extends the checkpoint option to allow checkpoint=disable:%u[%] This allows you to specify what how much of the disk you are willing to lose access to while mounting with checkpoint=disable. If the amount lost would be higher, the mount will return -EAGAIN. This can be given as a percent of total space, or in blocks. Currently, we need to run garbage collection until the amount of holes is smaller than the OVP space. With the new option, f2fs can mark space as unusable up front instead of requiring garbage collection until the number of holes is small enough. Signed-off-by: Daniel Rosenberg Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++ Documentation/filesystems/f2fs.txt | 19 +++++++- fs/f2fs/f2fs.h | 6 ++- fs/f2fs/segment.c | 17 +++++-- fs/f2fs/super.c | 59 ++++++++++++++++--------- fs/f2fs/sysfs.c | 16 +++++++ 6 files changed, 99 insertions(+), 26 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 91822ce25831..dca326e0ee3e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -243,3 +243,11 @@ Description: - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - [h] means add/del hot file extension - [c] means add/del cold file extension + +What: /sys/fs/f2fs//unusable +Date April 2019 +Contact: "Daniel Rosenberg" +Description: + If checkpoint=disable, it displays the number of blocks that are unusable. + If checkpoint=enable it displays the enumber of blocks that would be unusable + if checkpoint=disable were to be set. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 66aca042988e..bebd1be3ba49 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -214,11 +214,22 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix", non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. -checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable" +checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable" to reenable checkpointing. Is enabled by default. While disabled, any unmounting or unexpected shutdowns will cause the filesystem contents to appear as they did when the filesystem was mounted with that option. + While mounting with checkpoint=disabled, the filesystem must + run garbage collection to ensure that all available space can + be used. If this takes too much time, the mount may return + EAGAIN. You may optionally add a value to indicate how much + of the disk you would be willing to temporarily give up to + avoid additional garbage collection. This can be given as a + number of blocks, or as a percent. For instance, mounting + with checkpoint=disable:100% would always succeed, but it may + hide up to all remaining free space. The actual space that + would be unusable can be viewed at /sys/fs/f2fs//unusable + This space is reclaimed once checkpoint=enable. ================================================================================ DEBUGFS ENTRIES @@ -396,6 +407,12 @@ Files in /sys/fs/f2fs/ current_reserved_blocks This shows # of blocks currently reserved. + unusable If checkpoint=disable, this shows the number of + blocks that are unusable. + If checkpoint=enable it shows the number of blocks + that would be unusable if checkpoint=disable were + to be set. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index af1cf2ee9375..edc848152553 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -136,6 +136,9 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ + block_t unusable_cap; /* Amount of space allowed to be + * unusable when disabling checkpoint + */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -3086,7 +3089,8 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 71f8913aa5f9..54a3e398d1ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -873,13 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ + block_t unusable; struct seg_entry *se; unsigned int segno; @@ -893,7 +894,17 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp_holes || holes[NODE] > ovp_holes) + unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + if (unusable > ovp_holes) + return unusable - ovp_holes; + return 0; +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 68a3d99f6c27..662ceeb4152f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,7 +136,10 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, - Opt_checkpoint, + Opt_checkpoint_disable, + Opt_checkpoint_disable_cap, + Opt_checkpoint_disable_cap_perc, + Opt_checkpoint_enable, Opt_err, }; @@ -195,7 +198,10 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_checkpoint, "checkpoint=%s"}, + {Opt_checkpoint_disable, "checkpoint=disable"}, + {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, + {Opt_checkpoint_enable, "checkpoint=enable"}, {Opt_err, NULL}, }; @@ -772,22 +778,30 @@ static int parse_options(struct super_block *sb, char *options) "Test dummy encryption mount option ignored"); #endif break; - case Opt_checkpoint: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (strlen(name) == 6 && - !strncmp(name, "enable", 6)) { - clear_opt(sbi, DISABLE_CHECKPOINT); - } else if (strlen(name) == 7 && - !strncmp(name, "disable", 7)) { - set_opt(sbi, DISABLE_CHECKPOINT); - } else { - kvfree(name); + case Opt_checkpoint_disable_cap_perc: + if (args->from && match_int(args, &arg)) return -EINVAL; - } - kvfree(name); + if (arg < 0 || arg > 100) + return -EINVAL; + if (arg == 100) + F2FS_OPTION(sbi).unusable_cap = + sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = + (sbi->user_block_count / 100) * arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable_cap: + if (args->from && match_int(args, &arg)) + return -EINVAL; + F2FS_OPTION(sbi).unusable_cap = arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable: + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_enable: + clear_opt(sbi, DISABLE_CHECKPOINT); break; default: f2fs_msg(sb, KERN_ERR, @@ -1417,8 +1431,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",alloc_mode=%s", "reuse"); if (test_opt(sbi, DISABLE_CHECKPOINT)) - seq_puts(seq, ",checkpoint=disable"); - + seq_printf(seq, ",checkpoint=disable:%u", + F2FS_OPTION(sbi).unusable_cap); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1447,6 +1461,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); + F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); @@ -1475,6 +1490,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) struct cp_control cpc; int err = 0; int ret; + block_t unusable; if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_ERR, @@ -1502,7 +1518,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - if (f2fs_disable_cp_again(sbi)) { + unusable = f2fs_get_unusable_blocks(sbi); + if (f2fs_disable_cp_again(sbi, unusable)) { err = -EAGAIN; goto restore_flag; } @@ -1515,7 +1532,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto out_unlock; spin_lock(&sbi->stat_lock); - sbi->unusable_block_count = 0; + sbi->unusable_block_count = unusable; spin_unlock(&sbi->stat_lock); out_unlock: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 729f46a3c9ee..fa184880cff3 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -68,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a, (unsigned long long)(dirty_segments(sbi))); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)unusable); +} + + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -440,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); +F2FS_GENERAL_RO_ATTR(unusable); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -495,6 +510,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), From c49633cc68fa00eebc1c199474364b8305e4caf2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 4 Jun 2019 22:59:04 -0700 Subject: [PATCH 15/47] f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags f2fs copied all the on-disk i_flags from ext4, and along with it the assumption that the on-disk i_flags are the same as the bits used by FS_IOC_GETFLAGS and FS_IOC_SETFLAGS. This is problematic because reserving an on-disk inode flag in either filesystem's i_flags or in these ioctls effectively reserves it in all the other places too. In fact, most of the "f2fs i_flags" are not used by f2fs at all. Fix this by separating f2fs's i_flags from the ioctl bits and ext4's i_flags. In the process, un-reserve all "f2fs i_flags" that aren't actually supported by f2fs. This included various flags that were not settable at all, as well as various flags that were settable by FS_IOC_SETFLAGS but didn't actually do anything. There's a slight chance we'll need to add some flag(s) back to FS_IOC_SETFLAGS in order to avoid breaking users who expect f2fs to accept some random flag(s). But hopefully such users don't exist. Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 42 +-------- fs/f2fs/file.c | 237 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 162 insertions(+), 117 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index edc848152553..de32093afc96 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2335,57 +2335,23 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) } /* - * Inode flags + * On-disk inode flags (f2fs_inode::i_flags) */ -#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */ -#define F2FS_UNRM_FL 0x00000002 /* Undelete */ -#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define F2FS_DIRTY_FL 0x00000100 -#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */ -/* End compression flags --- maybe not all used */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */ -#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ -#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ -#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ -#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ -#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ -#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ - -#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */ -#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ - -/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ -#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \ - F2FS_IMMUTABLE_FL | \ - F2FS_APPEND_FL | \ - F2FS_NODUMP_FL | \ - F2FS_NOATIME_FL | \ - F2FS_PROJINHERIT_FL) /* Flags that should be inherited by new inodes from their parent. */ -#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\ - F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\ - F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\ - F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\ - F2FS_PROJINHERIT_FL) +#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL)) +#define F2FS_REG_FLMASK (~F2FS_DIRSYNC_FL) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 45b45f37d347..efdafa886510 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -707,11 +707,9 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } - flags = fi->i_flags & F2FS_FL_USER_VISIBLE; + flags = fi->i_flags; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & F2FS_COMPR_FL) - stat->attributes |= STATX_ATTR_COMPRESSED; if (IS_ENCRYPTED(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; if (flags & F2FS_IMMUTABLE_FL) @@ -720,7 +718,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | - STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); @@ -1648,44 +1645,22 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) return 0; } -static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags; - - if (IS_ENCRYPTED(inode)) - flags |= F2FS_ENCRYPT_FL; - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - flags |= F2FS_INLINE_DATA_FL; - if (is_inode_flag_set(inode, FI_PIN_FILE)) - flags |= F2FS_NOCOW_FL; - - flags &= F2FS_FL_USER_VISIBLE; - - return put_user(flags, (int __user *)arg); -} - -static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) +static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int oldflags; + u32 oldflags; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - flags = f2fs_mask_flags(inode->i_mode, flags); - oldflags = fi->i_flags; - if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) + if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - flags = flags & F2FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; + fi->i_flags = iflags | (oldflags & ~mask); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1698,26 +1673,124 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) return 0; } +/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry + * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to + * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add + * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + */ + +static const struct { + u32 iflag; + u32 fsflag; +} f2fs_fsflags_map[] = { + { F2FS_SYNC_FL, FS_SYNC_FL }, + { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, + { F2FS_APPEND_FL, FS_APPEND_FL }, + { F2FS_NODUMP_FL, FS_NODUMP_FL }, + { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_INDEX_FL, FS_INDEX_FL }, + { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, + { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, +}; + +#define F2FS_GETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_INDEX_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL | \ + FS_ENCRYPT_FL | \ + FS_INLINE_DATA_FL | \ + FS_NOCOW_FL) + +#define F2FS_SETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL) + +/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ +static inline u32 f2fs_iflags_to_fsflags(u32 iflags) +{ + u32 fsflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (iflags & f2fs_fsflags_map[i].iflag) + fsflags |= f2fs_fsflags_map[i].fsflag; + + return fsflags; +} + +/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ +static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) +{ + u32 iflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (fsflags & f2fs_fsflags_map[i].fsflag) + iflags |= f2fs_fsflags_map[i].iflag; + + return iflags; +} + +static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fsflags &= F2FS_GETTABLE_FS_FL; + + return put_user(fsflags, (int __user *)arg); +} + static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - unsigned int flags; + u32 fsflags; + u32 iflags; int ret; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *)arg)) + if (get_user(fsflags, (int __user *)arg)) return -EFAULT; + if (fsflags & ~F2FS_GETTABLE_FS_FL) + return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + + iflags = f2fs_fsflags_to_iflags(fsflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); - ret = __f2fs_ioc_setflags(inode, flags); - + ret = f2fs_setflags_common(inode, iflags, + f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -2727,47 +2800,56 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) } #endif -/* Transfer internal flags to xflags */ -static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) -{ - __u32 xflags = 0; +/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable + * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its + * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS. + */ + +static const struct { + u32 iflag; + u32 xflag; +} f2fs_xflags_map[] = { + { F2FS_SYNC_FL, FS_XFLAG_SYNC }, + { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE }, + { F2FS_APPEND_FL, FS_XFLAG_APPEND }, + { F2FS_NODUMP_FL, FS_XFLAG_NODUMP }, + { F2FS_NOATIME_FL, FS_XFLAG_NOATIME }, + { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT }, +}; + +#define F2FS_SUPPORTED_XFLAGS ( \ + FS_XFLAG_SYNC | \ + FS_XFLAG_IMMUTABLE | \ + FS_XFLAG_APPEND | \ + FS_XFLAG_NODUMP | \ + FS_XFLAG_NOATIME | \ + FS_XFLAG_PROJINHERIT) + +/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */ +static inline u32 f2fs_iflags_to_xflags(u32 iflags) +{ + u32 xflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (iflags & f2fs_xflags_map[i].iflag) + xflags |= f2fs_xflags_map[i].xflag; - if (iflags & F2FS_SYNC_FL) - xflags |= FS_XFLAG_SYNC; - if (iflags & F2FS_IMMUTABLE_FL) - xflags |= FS_XFLAG_IMMUTABLE; - if (iflags & F2FS_APPEND_FL) - xflags |= FS_XFLAG_APPEND; - if (iflags & F2FS_NODUMP_FL) - xflags |= FS_XFLAG_NODUMP; - if (iflags & F2FS_NOATIME_FL) - xflags |= FS_XFLAG_NOATIME; - if (iflags & F2FS_PROJINHERIT_FL) - xflags |= FS_XFLAG_PROJINHERIT; return xflags; } -#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ - FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) - -/* Transfer xflags flags to internal */ -static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags) +/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */ +static inline u32 f2fs_xflags_to_iflags(u32 xflags) { - unsigned long iflags = 0; + u32 iflags = 0; + int i; - if (xflags & FS_XFLAG_SYNC) - iflags |= F2FS_SYNC_FL; - if (xflags & FS_XFLAG_IMMUTABLE) - iflags |= F2FS_IMMUTABLE_FL; - if (xflags & FS_XFLAG_APPEND) - iflags |= F2FS_APPEND_FL; - if (xflags & FS_XFLAG_NODUMP) - iflags |= F2FS_NODUMP_FL; - if (xflags & FS_XFLAG_NOATIME) - iflags |= F2FS_NOATIME_FL; - if (xflags & FS_XFLAG_PROJINHERIT) - iflags |= F2FS_PROJINHERIT_FL; + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (xflags & f2fs_xflags_map[i].xflag) + iflags |= f2fs_xflags_map[i].iflag; return iflags; } @@ -2779,8 +2861,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & - F2FS_FL_USER_VISIBLE); + fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, @@ -2818,9 +2899,8 @@ static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); struct fsxattr fa; - unsigned int flags; + u32 iflags; int err; if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa))) @@ -2830,11 +2910,11 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) if (!inode_owner_or_capable(inode)) return -EACCES; - if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS) + if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS) return -EOPNOTSUPP; - flags = f2fs_xflags_to_iflags(fa.fsx_xflags); - if (f2fs_mask_flags(inode->i_mode, flags) != flags) + iflags = f2fs_xflags_to_iflags(fa.fsx_xflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2845,9 +2925,8 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) err = f2fs_ioctl_check_project(inode, &fa); if (err) goto out; - flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | - (flags & F2FS_FL_XFLAG_VISIBLE); - err = __f2fs_ioc_setflags(inode, flags); + err = f2fs_setflags_common(inode, iflags, + f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); if (err) goto out; From a5f935a02f067456c553c66a466e1a3016c5e2c4 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 13 Jun 2019 16:29:53 +0900 Subject: [PATCH 16/47] f2fs: only set project inherit bit for directory It doesn't make any sense to have project inherit bits for regular files, even though this won't cause any problem, but it is better fix this. Cc: Andreas Dilger Signed-off-by: Wang Shilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index de32093afc96..41cfada321a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2351,7 +2351,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define F2FS_REG_FLMASK (~F2FS_DIRSYNC_FL) +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ccb02226dd2c..8838e55e7416 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -343,6 +343,8 @@ static int do_read_inode(struct inode *inode) le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); + if (S_ISREG(inode->i_mode)) + fi->i_flags &= ~F2FS_PROJINHERIT_FL; fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); From 0177d8f11604b8e190b4b781e881b4167e2ad7ea Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Wed, 5 Jun 2019 11:33:25 +0800 Subject: [PATCH 17/47] f2fs: ioctl for removing a range from F2FS This ioctl shrinks a given length (aligned to sections) from end of the main area. Any cursegs and valid blocks will be moved out before invalidating the range. This feature can be used for adjusting partition sizes online. History of the patch: Sahitya Tummala: - Add this ioctl for f2fs_compat_ioctl() as well. - Fix debugfs status to reflect the online resize changes. - Fix potential race between online resize path and allocate new data block path or gc path. Others: - Rename some identifiers. - Add some error handling branches. - Clear sbi->next_victim_seg[BG_GC/FG_GC] in shrinking range. - Implement this interface as ext4's, and change the parameter from shrunk bytes to new block count of F2FS. - During resizing, force to empty sit_journal and forbid adding new entries to it, in order to avoid invalid segno in journal after resize. - Reduce sbi->user_block_count before resize starts. - Commit the updated superblock first, and then update in-memory metadata only when the former succeeds. - Target block count must align to sections. - Write checkpoint before and after committing the new superblock, w/o CP_FSCK_FLAG respectively, so that the FS can be fixed by fsck even if resize fails after the new superblock is committed. - In free_segment_range(), reduce granularity of gc_mutex. - Add protection on curseg migration. - Add freeze_bdev() and thaw_bdev() for resize fs. - Remove CUR_MAIN_SECS and use MAIN_SECS directly for allocation. - Recover super_block and FS metadata when resize fails. - No need to clear CP_FSCK_FLAG in update_ckpt_flags(). - Clean up the sb and fs metadata update functions for resize_fs. Geert Uytterhoeven: - Use div_u64*() for 64-bit divisions Arnd Bergmann: - Not all architectures support get_user() with a 64-bit argument: ERROR: "__get_user_bad" [fs/f2fs/f2fs.ko] undefined! Use copy_from_user() here, this will always work. Signed-off-by: Qiuyang Sun Signed-off-by: Chao Yu Signed-off-by: Sahitya Tummala Signed-off-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +- fs/f2fs/debug.c | 7 ++ fs/f2fs/f2fs.h | 6 ++ fs/f2fs/file.c | 24 ++++++ fs/f2fs/gc.c | 182 ++++++++++++++++++++++++++++++++++++++++++- fs/f2fs/segment.c | 39 +++++++++- fs/f2fs/super.c | 4 + 7 files changed, 260 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 89825261d474..7b4a0fd59512 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1302,7 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 99e9a5c37b71..7706049d23bf 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); int i; + /* these will be changed if online resize is done */ + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + /* validation check of the segment numbers */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_cached = atomic64_read(&sbi->read_hit_cached); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 41cfada321a8..646ec40495b0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -415,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -1116,6 +1117,7 @@ enum { SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ }; enum { @@ -1236,6 +1238,7 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ + struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ @@ -3059,6 +3062,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3197,6 +3202,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); /* * recovery.c diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index efdafa886510..74600c4205da 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3059,6 +3059,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3115,6 +3136,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -3228,6 +3251,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1e029da26053..e19b49b02d1b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; - unsigned int last_segment = MAIN_SEGS(sbi); + unsigned int last_segment; unsigned int nsearched = 0; mutex_lock(&dirty_i->seglist_lock); + last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); @@ -405,7 +406,8 @@ next: sm->last_victim[p.gc_mode] = last_victim + 1; else sm->last_victim[p.gc_mode] = segno + 1; - sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] %= + (MAIN_SECS(sbi) * sbi->segs_per_sec); break; } } @@ -1361,3 +1363,179 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } + +static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, + unsigned int end) +{ + int type; + unsigned int segno, next_inuse; + int err = 0; + + /* Move out cursegs from the target range */ + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) + allocate_segment_for_resize(sbi, type, start, end); + + /* do GC to move out valid blocks in the range */ + for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + mutex_lock(&sbi->gc_mutex); + do_garbage_collect(sbi, segno, &gc_list, FG_GC); + mutex_unlock(&sbi->gc_mutex); + put_gc_inode(&gc_list); + + if (get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + } + + err = f2fs_sync_fs(sbi->sb, 1); + if (err) + return err; + + next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); + if (next_inuse <= end) { + f2fs_msg(sbi->sb, KERN_ERR, + "segno %u should be free but still inuse!", next_inuse); + f2fs_bug_on(sbi, 1); + } + return err; +} + +static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) +{ + struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); + int section_count = le32_to_cpu(raw_sb->section_count); + int segment_count = le32_to_cpu(raw_sb->segment_count); + int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + long long block_count = le64_to_cpu(raw_sb->block_count); + int segs = secs * sbi->segs_per_sec; + + raw_sb->section_count = cpu_to_le32(section_count + secs); + raw_sb->segment_count = cpu_to_le32(segment_count + segs); + raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); + raw_sb->block_count = cpu_to_le64(block_count + + (long long)segs * sbi->blocks_per_seg); +} + +static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) +{ + int segs = secs * sbi->segs_per_sec; + long long user_block_count = + le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); + + SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; + MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; + FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + + (long long)segs * sbi->blocks_per_seg); +} + +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +{ + __u64 old_block_count, shrunk_blocks; + unsigned int secs; + int gc_mode, gc_type; + int err = 0; + __u32 rem; + + old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); + if (block_count > old_block_count) + return -EINVAL; + + /* new fs size should align to section size */ + div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); + if (rem) + return -EINVAL; + + if (block_count == old_block_count) + return 0; + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Should run fsck to repair first."); + return -EINVAL; + } + + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Checkpoint should be enabled."); + return -EINVAL; + } + + freeze_bdev(sbi->sb->s_bdev); + + shrunk_blocks = old_block_count - block_count; + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + else + sbi->user_block_count -= shrunk_blocks; + spin_unlock(&sbi->stat_lock); + if (err) { + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; + } + + mutex_lock(&sbi->resize_mutex); + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + + MAIN_SECS(sbi) -= secs; + + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + + err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, + MAIN_SEGS(sbi) - 1); + if (err) + goto out; + + update_sb_metadata(sbi, -secs); + + err = f2fs_commit_super(sbi, false); + if (err) { + update_sb_metadata(sbi, secs); + goto out; + } + + update_fs_metadata(sbi, -secs); + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = f2fs_sync_fs(sbi->sb, 1); + if (err) { + update_fs_metadata(sbi, secs); + update_sb_metadata(sbi, secs); + f2fs_commit_super(sbi, false); + } +out: + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_ERR, + "resize_fs failed, should run fsck to repair!"); + + MAIN_SECS(sbi) += secs; + spin_lock(&sbi->stat_lock); + sbi->user_block_count += shrunk_blocks; + spin_unlock(&sbi->stat_lock); + } + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + mutex_unlock(&sbi->resize_mutex); + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 54a3e398d1ea..198b2e68a487 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2657,6 +2657,40 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_msg(sbi->sb, KERN_NOTICE, + "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -3786,7 +3820,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3805,7 +3839,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 662ceeb4152f..f74923102ed0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3223,6 +3223,7 @@ try_onemore: mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3295,6 +3296,9 @@ try_onemore: sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { From 26bc778b21a38a50b3a908e08fd3a3d5ace7e137 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jun 2019 18:00:09 +0800 Subject: [PATCH 18/47] f2fs: avoid get_valid_blocks() for cleanup No logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fa184880cff3..50f38054b648 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -582,8 +582,7 @@ static int __maybe_unused segment_info_seq_show(struct seq_file *seq, if ((i % 10) == 0) seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -609,8 +608,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, struct seg_entry *se = get_seg_entry(sbi, i); seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); From ae8ba98776eb959dab985fdf48d07aca6a40db4c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 18 Jun 2019 17:48:42 +0800 Subject: [PATCH 19/47] f2fs: introduce f2fs_ macros to wrap f2fs_printk() - Add and use f2fs_ macros - Convert f2fs_msg to f2fs_printk - Remove level from f2fs_printk and embed the level in the format - Coalesce formats and align multi-line arguments - Remove unnecessary duplicate extern f2fs_msg f2fs.h Signed-off-by: Joe Perches Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 34 ++- fs/f2fs/dir.c | 10 +- fs/f2fs/extent_cache.c | 7 +- fs/f2fs/f2fs.h | 38 ++-- fs/f2fs/file.c | 21 +- fs/f2fs/gc.c | 23 +- fs/f2fs/inline.c | 12 +- fs/f2fs/inode.c | 72 +++--- fs/f2fs/namei.c | 10 +- fs/f2fs/node.c | 30 +-- fs/f2fs/recovery.c | 37 ++-- fs/f2fs/segment.c | 61 +++-- fs/f2fs/segment.h | 10 +- fs/f2fs/super.c | 491 +++++++++++++++++------------------------ 14 files changed, 350 insertions(+), 506 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7b4a0fd59512..a5ae1ef1a6d0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, exist = f2fs_test_bit(offset, se->cur_valid_map); if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); } @@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); + f2fs_warn(sbi, "access invalid blkaddr:%u", + blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); return false; @@ -657,9 +657,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) err_out: set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); + f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); return err; } @@ -676,13 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) return 0; if (bdev_read_only(sbi->sb->s_bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping orphan cleanup"); + f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); return 0; } if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } @@ -827,15 +825,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, if (crc_offset < CP_MIN_CHKSUM_OFFSET || crc_offset > CP_CHKSUM_OFFSET) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid crc_offset: %zu", crc_offset); + f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } crc = f2fs_checkpoint_chksum(sbi, *cp_block); if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -858,9 +855,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (le32_to_cpu(cp_block->cp_pack_total_block_count) > sbi->blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid cp_pack_total_block_count:%u", - le32_to_cpu(cp_block->cp_pack_total_block_count)); + f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; @@ -1559,8 +1555,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (cpc->reason != CP_PAUSE) return 0; - f2fs_msg(sbi->sb, KERN_WARNING, - "Start checkpoint disabled!"); + f2fs_warn(sbi, "Start checkpoint disabled!"); } mutex_lock(&sbi->cp_mutex); @@ -1626,8 +1621,7 @@ stop: stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) - f2fs_msg(sbi->sb, KERN_NOTICE, - "checkpoint: version = %llx", ckpt_ver); + f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 32ca1602c898..051370d1fdff 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -218,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { - f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, - "Corrupted max_depth of %lu: %u", - dir->i_ino, max_depth); + f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } @@ -816,9 +815,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted namelen=%d, run fsck to fix.", - __func__, le16_to_cpu(de->name_len)); + f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; goto out; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index caf77fe8ac07..e60078460ad1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -184,10 +184,9 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, next_re = rb_entry(next, struct rb_entry, rb_node); if (cur_re->ofs + cur_re->len > next_re->ofs) { - f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " - "cur(%u, %u) next(%u, %u)", - cur_re->ofs, cur_re->len, - next_re->ofs, next_re->len); + f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); return false; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 646ec40495b0..cc0d5693989c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1809,7 +1809,20 @@ enospc: return -ENOSPC; } -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +__printf(2, 3) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1825,11 +1838,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); if (unlikely(inode->i_blocks < sectors)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks, - (unsigned long long)sectors); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -2067,10 +2079,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -2840,9 +2851,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { - f2fs_msg(sbi->sb, KERN_ERR, - "invalid blkaddr: %u, type: %d, run fsck to fix.", - blkaddr, type); + f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); f2fs_bug_on(sbi, 1); } } @@ -2973,8 +2983,6 @@ int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); -extern __printf(3, 4) -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 74600c4205da..1331cae6b5c2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1837,9 +1837,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) * f2fs_is_atomic_file. */ if (get_dirty_pages(inode)) - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%u", - inode->i_ino, get_dirty_pages(inode)); + f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u", + inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2274,8 +2273,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) return -EROFS; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - f2fs_msg(sbi->sb, KERN_INFO, - "Skipping Checkpoint. Checkpoints currently disabled."); + f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); return -EINVAL; } @@ -2660,10 +2658,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, - sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", + range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; } @@ -2948,10 +2944,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) fi->i_gc_failures[GC_FAILURE_PIN] + 1); if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e19b49b02d1b..c65f87f11de0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -618,9 +618,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (sum->version != dni->version) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: valid data with mismatched node version.", - __func__); + f2fs_warn(sbi, "%s: valid data with mismatched node version.", + __func__); set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -1183,9 +1182,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) " - "type [%d, %d] in SSA and SIT", - segno, type, GET_SUM_TYPE((&sum->footer))); + f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false); goto skip; @@ -1397,8 +1395,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); if (next_inuse <= end) { - f2fs_msg(sbi->sb, KERN_ERR, - "segno %u should be free but still inuse!", next_inuse); + f2fs_err(sbi, "segno %u should be free but still inuse!", + next_inuse); f2fs_bug_on(sbi, 1); } return err; @@ -1455,14 +1453,12 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return 0; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Should run fsck to repair first."); + f2fs_err(sbi, "Should run fsck to repair first."); return -EINVAL; } if (test_opt(sbi, DISABLE_CHECKPOINT)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Checkpoint should be enabled."); + f2fs_err(sbi, "Checkpoint should be enabled."); return -EINVAL; } @@ -1526,8 +1522,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out: if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_ERR, - "resize_fs failed, should run fsck to repair!"); + f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); MAIN_SECS(sbi) += secs; spin_lock(&sbi->stat_lock); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a2a69712b3ed..2e4913a34e14 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -140,10 +140,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (unlikely(dn->data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(dn); set_sbi_flag(fio.sbi, SBI_NEED_FSCK); - f2fs_msg(fio.sbi->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dn->inode->i_ino, dn->data_blkaddr); + f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); return -EINVAL; } @@ -383,10 +381,8 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (unlikely(dn.data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(&dn); set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); - f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dir->i_ino, dn.data_blkaddr); + f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); err = -EINVAL; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 8838e55e7416..9ea3aedb8213 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -176,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) calculated = f2fs_inode_chksum(sbi, page); if (provided != calculated) - f2fs_msg(sbi->sb, KERN_WARNING, - "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", + page->index, ino_of_node(page), provided, calculated); return provided == calculated; } @@ -202,50 +201,41 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); if (!iblocks) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " - "run fsck to fix.", - __func__, inode->i_ino, iblocks); + f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", + __func__, inode->i_ino, iblocks); return false; } if (ino_of_node(node_page) != nid_of_node(node_page)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode footer i_ino=%lx, ino,nid: " - "[%u, %u] run fsck to fix.", - __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); return false; } if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", + __func__, inode->i_ino); return false; } if (f2fs_has_extra_attr(inode) && !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) is with extra_attr, " - "but extra_attr feature is off", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", + __func__, inode->i_ino); return false; } if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || fi->i_extra_isize % sizeof(__le32)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " - "max: %zu", - __func__, inode->i_ino, fi->i_extra_isize, - F2FS_TOTAL_EXTRA_ATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } @@ -255,11 +245,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) (!fi->i_inline_xattr_size || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted " - "i_inline_xattr_size: %d, max: %zu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); return false; } @@ -272,11 +260,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, DATA_GENERIC_ENHANCE))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) extent info [%u, %u, %u] " - "is incorrect, run fsck to fix", - __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); return false; } } @@ -284,19 +270,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_inline_data(inode) && (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_data, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_dentry, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } @@ -785,8 +767,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "May loss orphan inode, run fsck to fix."); + f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } @@ -794,8 +775,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "Too many orphan inodes, run fsck to fix."); + f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix."); } else { f2fs_add_orphan_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2fdcbe923a32..0bdc64994caf 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -385,9 +385,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) int err = 0; if (f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "skip recovering inline_dots inode (ino:%lu, pino:%u) " - "in readonly mountpoint", dir->i_ino, pino); + f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", + dir->i_ino, pino); return 0; } @@ -480,9 +479,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu", - dir->i_ino, inode->i_ino); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); err = -EPERM; goto out_iput; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4e3628e96318..4a935a26bf82 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -34,9 +34,8 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: out-of-range nid=%x, run fsck to fix.", - __func__, nid); + f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", + __func__, nid); return -EINVAL; } return 0; @@ -1187,10 +1186,8 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } @@ -1380,11 +1377,10 @@ repeat: } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " - "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); + f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1752,9 +1748,8 @@ continue_unlock: break; } if (!ret && atomic && !marked) { - f2fs_msg(sbi->sb, KERN_DEBUG, - "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -2304,8 +2299,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); - f2fs_msg(sbi->sb, KERN_ERR, - "NAT is corrupt, run fsck to fix it"); + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } } @@ -2915,7 +2909,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->full_nat_bits = nm_i->nat_bits + 8; nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e04f82b3f4fc..72c2c4ba795f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -188,10 +188,9 @@ out: name = ""; else name = raw_inode->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, - IS_ERR(dir) ? 0 : dir->i_ino, err); + f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ipage), name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -292,9 +291,8 @@ static int recover_inode(struct inode *inode, struct page *page) else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); return 0; } @@ -371,10 +369,9 @@ next: /* sanity check in order to detect looped node chain */ if (++loop_cnt >= free_blocks || blkaddr == next_blkaddr_of_node(page)) { - f2fs_msg(sbi->sb, KERN_NOTICE, - "%s: detect looped node chain, " - "blkaddr:%u, next:%u", - __func__, blkaddr, next_blkaddr_of_node(page)); + f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u", + __func__, blkaddr, + next_blkaddr_of_node(page)); f2fs_put_page(page, 1); err = -EINVAL; break; @@ -553,10 +550,9 @@ retry_dn: f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); + f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_page), + ofs_of_node(page)); err = -EFAULT; goto err; } @@ -642,11 +638,9 @@ retry_prev: err: f2fs_put_dnode(&dn); out: - f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, - file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", + inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", + recovered, err); return err; } @@ -734,8 +728,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) #endif if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, - "recover fsync data on readonly fs"); + f2fs_info(sbi, "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 198b2e68a487..4a6133cc9112 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1757,8 +1757,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, devi = f2fs_target_device_index(sbi, blkstart); if (blkstart < FDEV(devi).start_blk || blkstart > FDEV(devi).end_blk) { - f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x", - blkstart); + f2fs_err(sbi, "Invalid block %x", blkstart); return -EIO; } blkstart -= FDEV(devi).start_blk; @@ -1771,10 +1770,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_ERR, - "(%d) %s: Unaligned zone reset attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); + f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path : "", + blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); @@ -2138,15 +2136,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; @@ -2167,15 +2164,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; @@ -2683,9 +2679,8 @@ unlock: up_write(&SIT_I(sbi)->sentry_lock); if (segno != curseg->segno) - f2fs_msg(sbi->sb, KERN_NOTICE, - "For resize: curseg of type %d: %u ==> %u", - type, segno, curseg->segno); + f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); @@ -2823,8 +2818,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Found FS corruption, run fsck to fix."); + f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); return -EIO; } @@ -3585,9 +3579,8 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { - f2fs_msg(sbi->sb, KERN_ERR, - "invalid journal entries nats %u sits %u\n", - nats_in_cursum(nat_j), sits_in_cursum(sit_j)); + f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; } @@ -4155,9 +4148,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) start = le32_to_cpu(segno_in_journal(journal, i)); if (start >= MAIN_SEGS(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong journal entry on segno %u", - start); + f2fs_err(sbi, "Wrong journal entry on segno %u", + start); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; break; @@ -4196,9 +4188,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) up_read(&curseg->journal_rwsem); if (!err && total_node_blocks != valid_node_count(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "SIT is corrupted node# %u vs %u", - total_node_blocks, valid_node_count(sbi)); + f2fs_err(sbi, "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; } @@ -4314,12 +4305,10 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: - f2fs_msg(sbi->sb, KERN_ERR, - "Current segment's next free block offset is " - "inconsistent with bitmap, logtype:%u, " - "segno:%u, type:%u, next_blkoff:%u, blkofs:%u", - i, curseg->segno, curseg->alloc_type, - curseg->next_blkoff, blkofs); + f2fs_err(sbi, + "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); return -EINVAL; } } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 429007b8036e..166ac0f07a4e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -693,9 +693,8 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, } while (cur_pos < sbi->blocks_per_seg); if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Mismatch valid blocks %d vs. %d", - GET_SIT_VBLOCKS(raw_sit), valid_blocks); + f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); return -EINVAL; } @@ -703,9 +702,8 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg || segno > TOTAL_SEGS(sbi) - 1)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid blocks %d or segno %u", - GET_SIT_VBLOCKS(raw_sit), segno); + f2fs_err(sbi, "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); return -EINVAL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f74923102ed0..b5255f293b25 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -205,15 +205,20 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + va_end(args); } @@ -226,21 +231,19 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; - f2fs_msg(sbi->sb, KERN_INFO, - "Reduce reserved blocks for root = %u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Reduce reserved blocks for root = %u", + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) - f2fs_msg(sbi->sb, KERN_INFO, - "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resuid), - from_kgid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resgid)); + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -261,35 +264,29 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, int ret = -EINVAL; if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_msg(sb, KERN_INFO, - "QUOTA feature is enabled, so ignore qf_name"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); return 0; } qname = match_strdup(args); if (!qname) { - f2fs_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); + f2fs_err(sbi, "Not enough memory for storing quotafile name"); return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else - f2fs_msg(sb, KERN_ERR, - "%s quota file already specified", + f2fs_err(sbi, "%s quota file already specified", QTYPE2NAME(qtype)); goto errout; } if (strchr(qname, '/')) { - f2fs_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); + f2fs_err(sbi, "quotafile must be on filesystem root"); goto errout; } F2FS_OPTION(sbi).s_qf_names[qtype] = qname; @@ -305,8 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); @@ -322,8 +318,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * to support legacy quotas in quota files. */ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " - "Cannot enable project quota enforcement."); + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -1; } if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || @@ -343,21 +338,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || test_opt(sbi, PRJQUOTA)) { - f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " - "format mixing"); + f2fs_err(sbi, "old and new quota format mixing"); return -1; } if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " - "not specified"); + f2fs_err(sbi, "journaled quota format not specified"); return -1; } } if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_INFO, - "QUOTA feature is enabled, so ignore jquota_fmt"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; } return 0; @@ -425,8 +417,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "discard is required for zoned block devices"); + f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); @@ -458,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_user_xattr: - f2fs_msg(sb, KERN_INFO, - "user_xattr options not supported"); + f2fs_info(sbi, "user_xattr options not supported"); break; case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); + f2fs_info(sbi, "nouser_xattr options not supported"); break; case Opt_inline_xattr: - f2fs_msg(sb, KERN_INFO, - "inline_xattr options not supported"); + f2fs_info(sbi, "inline_xattr options not supported"); break; case Opt_noinline_xattr: - f2fs_msg(sb, KERN_INFO, - "noinline_xattr options not supported"); + f2fs_info(sbi, "noinline_xattr options not supported"); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -483,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_acl: - f2fs_msg(sb, KERN_INFO, "acl options not supported"); + f2fs_info(sbi, "acl options not supported"); break; case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + f2fs_info(sbi, "noacl options not supported"); break; #endif case Opt_active_logs: @@ -536,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_msg(sb, KERN_INFO, - "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); @@ -549,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; uid = make_kuid(current_user_ns(), arg); if (!uid_valid(uid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid uid value %d", arg); + f2fs_err(sbi, "Invalid uid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resuid = uid; @@ -560,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; gid = make_kgid(current_user_ns(), arg); if (!gid_valid(gid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid gid value %d", arg); + f2fs_err(sbi, "Invalid gid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resgid = gid; @@ -574,9 +558,7 @@ static int parse_options(struct super_block *sb, char *options) if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "adaptive mode is not allowed with " - "zoned block device feature"); + f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); kvfree(name); return -EINVAL; } @@ -594,9 +576,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { - f2fs_msg(sb, KERN_WARNING, - "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + f2fs_warn(sbi, "Not support %d, larger than %d", + 1 << arg, BIO_MAX_PAGES); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; @@ -617,13 +598,11 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_fault_injection: - f2fs_msg(sb, KERN_INFO, - "fault_injection options not supported"); + f2fs_info(sbi, "fault_injection options not supported"); break; case Opt_fault_type: - f2fs_msg(sb, KERN_INFO, - "fault_type options not supported"); + f2fs_info(sbi, "fault_type options not supported"); break; #endif case Opt_lazytime: @@ -703,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_jqfmt_vfsv0: case Opt_jqfmt_vfsv1: case Opt_noquota: - f2fs_msg(sb, KERN_INFO, - "quota operations not supported"); + f2fs_info(sbi, "quota operations not supported"); break; #endif case Opt_whint: @@ -766,16 +744,14 @@ static int parse_options(struct super_block *sb, char *options) case Opt_test_dummy_encryption: #ifdef CONFIG_FS_ENCRYPTION if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; } F2FS_OPTION(sbi).test_dummy_encryption = true; - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mode enabled"); + f2fs_info(sbi, "Test dummy encryption mode enabled"); #else - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mount option ignored"); + f2fs_info(sbi, "Test dummy encryption mount option ignored"); #endif break; case Opt_checkpoint_disable_cap_perc: @@ -804,9 +780,8 @@ static int parse_options(struct super_block *sb, char *options) clear_opt(sbi, DISABLE_CHECKPOINT); break; default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); + f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } } @@ -815,23 +790,18 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; #else if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be " - "mounted RDWR without CONFIG_QUOTA"); + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "Should set mode=lfs with %uKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); + f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", + F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } @@ -840,15 +810,11 @@ static int parse_options(struct super_block *sb, char *options) if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_msg(sb, KERN_ERR, - "extra_attr or flexible_inline_xattr " - "feature is off"); + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_msg(sb, KERN_ERR, - "inline_xattr_size option should be " - "set with inline_xattr option"); + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } @@ -857,16 +823,14 @@ static int parse_options(struct super_block *sb, char *options) if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range: %d ~ %d", - min_size, max_size); + f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "LFS not compatible with checkpoint=disable\n"); + f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1493,8 +1457,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) block_t unusable; if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_ERR, - "checkpoint=disable on readonly fs"); + f2fs_err(sbi, "checkpoint=disable on readonly fs"); return -EINVAL; } sbi->sb->s_flags |= SB_ACTIVE; @@ -1597,8 +1560,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); - f2fs_msg(sb, KERN_INFO, - "Try to recover all the superblocks, ret: %d", err); + f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", + err); if (!err) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } @@ -1639,15 +1602,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "switch extent_cache option is not allowed"); + f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "disabling checkpoint not compatible with read-only"); + f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; } @@ -1717,8 +1678,7 @@ skip: restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) - f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread has stopped"); + f2fs_warn(sbi, "background gc thread has stopped"); } else if (need_stop_gc) { f2fs_stop_gc_thread(sbi); } @@ -1857,8 +1817,7 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sbi->sb, KERN_ERR, - "quota sysfile may be corrupted, skip loading it"); + f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it"); return 0; } @@ -1874,8 +1833,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quota_ino: %d", err); + f2fs_err(sbi, "Cannot turn on quota_ino: %d", err); return 0; } return 1; @@ -1888,8 +1846,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) enabled = 1; continue; } - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quotas: %d on %d", err, i); + f2fs_err(sbi, "Cannot turn on quotas: %d on %d", + err, i); } } return enabled; @@ -1910,8 +1868,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, qf_inode = f2fs_iget(sb, qf_inum); if (IS_ERR(qf_inode)) { - f2fs_msg(sb, KERN_ERR, - "Bad quota inode %u:%lu", type, qf_inum); + f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum); return PTR_ERR(qf_inode); } @@ -1924,17 +1881,17 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, static int f2fs_enable_quotas(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int type, err = 0; unsigned long qf_inum; bool quota_mopt[MAXQUOTAS] = { - test_opt(F2FS_SB(sb), USRQUOTA), - test_opt(F2FS_SB(sb), GRPQUOTA), - test_opt(F2FS_SB(sb), PRJQUOTA), + test_opt(sbi, USRQUOTA), + test_opt(sbi, GRPQUOTA), + test_opt(sbi, PRJQUOTA), }; if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sb, KERN_ERR, - "quota file may be corrupted, skip loading it"); + f2fs_err(sbi, "quota file may be corrupted, skip loading it"); return 0; } @@ -1947,10 +1904,8 @@ static int f2fs_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "fsck to fix.", type, err); + f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.", + type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); set_sbi_flag(F2FS_SB(sb), @@ -2070,10 +2025,8 @@ void f2fs_quota_off_umount(struct super_block *sb) if (err) { int ret = dquot_quota_off(sb, type); - f2fs_msg(sb, KERN_ERR, - "Fail to turn off disk quota " - "(type: %d, err: %d, ret:%d), Please " - "run fsck to fix it.", type, err, ret); + f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.", + type, err, ret); set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } @@ -2366,55 +2319,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, (segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Mismatch start address, segment0(%u) cp_blkaddr(%u)", - segment0_blkaddr, cp_blkaddr); + f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); return true; } if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != sit_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong CP boundary, start(%u) end(%u) blocks(%u)", - cp_blkaddr, sit_blkaddr, - segment_count_ckpt << log_blocks_per_seg); + f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); return true; } if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != nat_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", - sit_blkaddr, nat_blkaddr, - segment_count_sit << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); return true; } if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != ssa_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", - nat_blkaddr, ssa_blkaddr, - segment_count_nat << log_blocks_per_seg); + f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); return true; } if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != main_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", - ssa_blkaddr, main_blkaddr, - segment_count_ssa << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); return true; } if (main_end_blkaddr > seg_end_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", - main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", + main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); return true; } else if (main_end_blkaddr < seg_end_blkaddr) { int err = 0; @@ -2431,12 +2378,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, err = __f2fs_commit_super(bh, NULL); res = err ? "failed" : "done"; } - f2fs_msg(sb, KERN_INFO, - "Fix alignment : %s, start(%u) end(%u) block(%u)", - res, main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)", + res, main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); if (err) return true; } @@ -2450,7 +2396,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - struct super_block *sb = sbi->sb; unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2460,48 +2405,42 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum offset: %zu", - crc_offset); + f2fs_info(sbi, "Invalid SB checksum offset: %zu", + crc_offset); return 1; } crc = le32_to_cpu(raw_super->crc); if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum value: %u", crc); + f2fs_info(sbi, "Invalid SB checksum value: %u", crc); return 1; } } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_msg(sb, KERN_INFO, - "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); return 1; } /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB", - PAGE_SIZE); + f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", + PAGE_SIZE); return 1; } /* Currently, support only 4KB block size */ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB", - blocksize); + f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", + blocksize); return 1; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { - f2fs_msg(sb, KERN_INFO, - "Invalid log blocks per segment (%u)", - le32_to_cpu(raw_super->log_blocks_per_seg)); + f2fs_info(sbi, "Invalid log blocks per segment (%u)", + le32_to_cpu(raw_super->log_blocks_per_seg)); return 1; } @@ -2510,17 +2449,16 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < F2FS_MIN_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectorsize (%u)", + le32_to_cpu(raw_super->log_sectorsize)); return 1; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != F2FS_MAX_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid log sectors per block(%u) log sectorsize(%u)", - le32_to_cpu(raw_super->log_sectors_per_block), - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", + le32_to_cpu(raw_super->log_sectors_per_block), + le32_to_cpu(raw_super->log_sectorsize)); return 1; } @@ -2534,59 +2472,51 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", - segment_count); + f2fs_info(sbi, "Invalid segment count (%u)", segment_count); return 1; } if (total_sections > segment_count || total_sections < F2FS_MIN_SEGMENTS || segs_per_sec > segment_count || !segs_per_sec) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment/section count (%u, %u x %u)", - segment_count, total_sections, segs_per_sec); + f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); return 1; } if ((segment_count / segs_per_sec) < total_sections) { - f2fs_msg(sb, KERN_INFO, - "Small segment_count (%u < %u * %u)", - segment_count, segs_per_sec, total_sections); + f2fs_info(sbi, "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); return 1; } if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { - f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %llu)", - segment_count, le64_to_cpu(raw_super->block_count)); + f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } if (secs_per_zone > total_sections || !secs_per_zone) { - f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone / total_sections (%u, %u)", - secs_per_zone, total_sections); + f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", + secs_per_zone, total_sections); return 1; } if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || raw_super->hot_ext_count > F2FS_MAX_EXTENSION || (le32_to_cpu(raw_super->extension_count) + raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) { - f2fs_msg(sb, KERN_INFO, - "Corrupted extension count (%u + %u > %u)", - le32_to_cpu(raw_super->extension_count), - raw_super->hot_ext_count, - F2FS_MAX_EXTENSION); + f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)", + le32_to_cpu(raw_super->extension_count), + raw_super->hot_ext_count, + F2FS_MAX_EXTENSION); return 1; } if (le32_to_cpu(raw_super->cp_payload) > (blocks_per_seg - F2FS_CP_PACKS)) { - f2fs_msg(sb, KERN_INFO, - "Insane cp_payload (%u > %u)", - le32_to_cpu(raw_super->cp_payload), - blocks_per_seg - F2FS_CP_PACKS); + f2fs_info(sbi, "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); return 1; } @@ -2594,11 +2524,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || le32_to_cpu(raw_super->root_ino) != 3) { - f2fs_msg(sb, KERN_INFO, - "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", - le32_to_cpu(raw_super->node_ino), - le32_to_cpu(raw_super->meta_ino), - le32_to_cpu(raw_super->root_ino)); + f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); return 1; } @@ -2642,8 +2571,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || ovp_segments == 0 || reserved_segments == 0)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong layout: check mkfs.f2fs version"); + f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version"); return 1; } @@ -2652,16 +2580,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (!user_block_count || user_block_count >= segment_count_main << log_blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong user_block_count: %u", user_block_count); + f2fs_err(sbi, "Wrong user_block_count: %u", + user_block_count); return 1; } valid_user_blocks = le64_to_cpu(ckpt->valid_block_count); if (valid_user_blocks > user_block_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_user_blocks: %u, user_block_count: %u", - valid_user_blocks, user_block_count); + f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u", + valid_user_blocks, user_block_count); return 1; } @@ -2669,9 +2596,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) avail_node_count = sbi->total_node_count - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; if (valid_node_count > avail_node_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_node_count: %u, avail_node_count: %u", - valid_node_count, avail_node_count); + f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u", + valid_node_count, avail_node_count); return 1; } @@ -2685,10 +2611,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_node_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Node segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2700,10 +2625,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_data_segno[i])); + f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); return 1; } } @@ -2712,10 +2636,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u) and Data segment (%u)" - " has the same segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2726,9 +2649,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong bitmap size: sit: %u, nat:%u", - sit_bitmap_size, nat_bitmap_size); + f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); return 1; } @@ -2737,23 +2659,20 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (cp_pack_start_sum < cp_payload + 1 || cp_pack_start_sum > blocks_per_seg - 1 - NR_CURSEG_TYPE) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong cp_pack_start_sum: %u", - cp_pack_start_sum); + f2fs_err(sbi, "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); return 1; } if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { - f2fs_msg(sbi->sb, KERN_WARNING, - "layout of large_nat_bitmap is deprecated, " - "run fsck to repair, chksum_offset: %u", - le32_to_cpu(ckpt->checksum_offset)); + f2fs_warn(sbi, "layout of large_nat_bitmap is deprecated, run fsck to repair, chksum_offset: %u", + le32_to_cpu(ckpt->checksum_offset)); return 1; } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + f2fs_err(sbi, "A bug case: need to run fsck"); return 1; } return 0; @@ -2922,17 +2841,16 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, for (block = 0; block < 2; block++) { bh = sb_bread(sb, block); if (!bh) { - f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", - block + 1); + f2fs_err(sbi, "Unable to read %dth superblock", + block + 1); err = -EIO; continue; } /* sanity checking of raw super */ if (sanity_check_raw_super(sbi, bh)) { - f2fs_msg(sb, KERN_ERR, - "Can't find valid F2FS filesystem in %dth superblock", - block + 1); + f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", + block + 1); err = -EINVAL; brelse(bh); continue; @@ -3062,36 +2980,32 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Zoned block device feature not enabled\n"); + f2fs_err(sbi, "Zoned block device feature not enabled\n"); return -EINVAL; } if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { if (init_blkz_info(sbi, i)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Failed to initialize F2FS blkzone information"); + f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk, + bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? + "Host-aware" : "Host-managed"); continue; } #endif - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk); } - f2fs_msg(sbi->sb, KERN_INFO, - "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + f2fs_info(sbi, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -3137,7 +3051,7 @@ try_onemore: /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver."); + f2fs_err(sbi, "Cannot load crc32 driver."); err = PTR_ERR(sbi->s_chksum_driver); sbi->s_chksum_driver = NULL; goto free_sbi; @@ -3145,7 +3059,7 @@ try_onemore: /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { - f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -3169,8 +3083,7 @@ try_onemore: */ #ifndef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_ERR, - "Zoned block device support is not enabled"); + f2fs_err(sbi, "Zoned block device support is not enabled"); err = -EOPNOTSUPP; goto free_sb_buf; } @@ -3278,14 +3191,14 @@ try_onemore: /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); goto free_io_dummy; } err = f2fs_get_valid_checkpoint(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + f2fs_err(sbi, "Failed to get valid F2FS checkpoint"); goto free_meta_inode; } @@ -3302,7 +3215,7 @@ try_onemore: /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to find devices"); + f2fs_err(sbi, "Failed to find devices"); goto free_devices; } @@ -3333,14 +3246,14 @@ try_onemore: /* setup f2fs internal modules */ err = f2fs_build_segment_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager (%d)", err); + f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)", + err); goto free_sm; } err = f2fs_build_node_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager (%d)", err); + f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)", + err); goto free_nm; } @@ -3365,7 +3278,7 @@ try_onemore: /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + f2fs_err(sbi, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); goto free_stats; } @@ -3373,7 +3286,7 @@ try_onemore: /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { - f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + f2fs_err(sbi, "Failed to read root inode"); err = PTR_ERR(root); goto free_node_inode; } @@ -3399,8 +3312,7 @@ try_onemore: if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) - f2fs_msg(sb, KERN_ERR, - "Cannot turn on quotas: error %d", err); + f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif /* if there are nt orphan nodes free them */ @@ -3420,13 +3332,10 @@ try_onemore: if (f2fs_hw_is_readonly(sbi)) { if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data, but " - "write access unavailable"); + f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); goto free_meta; } - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping recovery"); + f2fs_info(sbi, "write access unavailable, skipping recovery"); goto reset_checkpoint; } @@ -3441,8 +3350,8 @@ try_onemore: if (err != -ENOMEM) skip_recovery = true; need_fsck = true; - f2fs_msg(sb, KERN_ERR, - "Cannot recover all fsync data errno=%d", err); + f2fs_err(sbi, "Cannot recover all fsync data errno=%d", + err); goto free_meta; } } else { @@ -3450,8 +3359,7 @@ try_onemore: if (!f2fs_readonly(sb) && err > 0) { err = -EINVAL; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data"); + f2fs_err(sbi, "Need to recover fsync data"); goto free_meta; } } @@ -3482,17 +3390,16 @@ reset_checkpoint: /* recover broken superblock */ if (recovery) { err = f2fs_commit_super(sbi, true); - f2fs_msg(sb, KERN_INFO, - "Try to recover %dth superblock, ret: %d", - sbi->valid_super_block ? 1 : 2, err); + f2fs_info(sbi, "Try to recover %dth superblock, ret: %d", + sbi->valid_super_block ? 1 : 2, err); } f2fs_join_shrinker(sbi); f2fs_tuning_parameters(sbi); - f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", - cur_cp_version(F2FS_CKPT(sbi))); + f2fs_notice(sbi, "Mounted with checkpoint version = %llx", + cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); From 9c5641fff72bc38d9f0c3440e1cb71e84f7f1399 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Jun 2019 17:59:03 +0800 Subject: [PATCH 20/47] f2fs: print kernel message if filesystem is inconsistent As Pavel reported, once we detect filesystem inconsistency in f2fs_inplace_write_data(), it will be better to print kernel message as we did in other places. Reported-by: Pavel Machek Signed-off-by: Chao Yu Acked-by: Pavel Machek Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4a6133cc9112..ee96e6fb1a50 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3242,6 +3242,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", + __func__, segno); return -EFAULT; } From 98a7a9ff00cdb3ba94023806e030aee062f23fcd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Jun 2019 16:42:08 +0200 Subject: [PATCH 21/47] f2fs: Use DIV_ROUND_UP() instead of open-coding Replace the open-coded divisions with round-up by calls to the DIV_ROUND_UP() helper macro. Signed-off-by: Geert Uytterhoeven Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/file.c | 6 +++--- fs/f2fs/segment.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cc0d5693989c..e41ef2f1a217 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -480,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode); #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_DENTRY_BITMAP_SIZE(inode) \ + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE) #define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ NR_INLINE_DENTRY(inode) + \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1331cae6b5c2..51de1bc6faea 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1211,7 +1211,7 @@ roll_back: static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; @@ -1464,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2362,7 +2362,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 166ac0f07a4e..2ae6df03b998 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -109,7 +109,7 @@ #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ - ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) + DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) From c21fda0653420a861eb731fc119c69dc9307db43 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Jun 2019 11:36:14 +0800 Subject: [PATCH 22/47] f2fs: use generic EFSBADCRC/EFSCORRUPTED f2fs uses EFAULT as error number to indicate filesystem is corrupted all the time, but generic filesystems use EUCLEAN for such condition, we need to change to follow others. This patch adds two new macros as below to wrap more generic error code macros, and spread them in code. EFSBADCRC EBADMSG /* Bad CRC detected */ EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ Reported-by: Pavel Machek Signed-off-by: Chao Yu Acked-by: Pavel Machek Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 +++++++++--- fs/f2fs/data.c | 18 +++++++++--------- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 6 +++--- fs/f2fs/inline.c | 4 ++-- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 6 +++--- fs/f2fs/recovery.c | 6 +++--- fs/f2fs/segment.c | 10 +++++----- fs/f2fs/segment.h | 4 ++-- fs/f2fs/super.c | 2 +- fs/f2fs/xattr.c | 4 ++-- 14 files changed, 46 insertions(+), 37 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a5ae1ef1a6d0..2d23671d2034 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -890,6 +890,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; + int err; sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), GFP_KERNEL); @@ -917,6 +918,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) } else if (cp2) { cur_page = cp2; } else { + err = -EFSCORRUPTED; goto fail_no_cp; } @@ -929,8 +931,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) sbi->cur_cp_pack = 2; /* Sanity checking of checkpoint */ - if (f2fs_sanity_check_ckpt(sbi)) + if (f2fs_sanity_check_ckpt(sbi)) { + err = -EFSCORRUPTED; goto free_fail_no_cp; + } if (cp_blks <= 1) goto done; @@ -944,8 +948,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned char *ckpt = (unsigned char *)sbi->ckpt; cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); - if (IS_ERR(cur_page)) + if (IS_ERR(cur_page)) { + err = PTR_ERR(cur_page); goto free_fail_no_cp; + } sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -960,7 +966,7 @@ free_fail_no_cp: f2fs_put_page(cp2, 1); fail_no_cp: kvfree(sbi->ckpt); - return -EINVAL; + return err; } static void __add_dirty_inode(struct inode *inode, enum inode_type type) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8d5c38a14b3f..6d074491325c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -455,7 +455,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) - return -EFAULT; + return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -489,7 +489,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) - return -EFAULT; + return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -789,7 +789,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } goto got_it; @@ -809,7 +809,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } got_it: @@ -1155,7 +1155,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto sync_out; } @@ -1625,7 +1625,7 @@ got_it: if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { - ret = -EFAULT; + ret = -EFSCORRUPTED; goto out; } } else { @@ -1906,7 +1906,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; ipu_force = true; fio->need_lock = LOCK_DONE; @@ -1933,7 +1933,7 @@ got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto out_writepage; } /* @@ -2606,7 +2606,7 @@ repeat: } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto fail; } err = f2fs_submit_page_read(inode, page, blkaddr); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 051370d1fdff..708d7a545177 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -818,7 +818,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; goto out; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e41ef2f1a217..da818d104a30 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3709,4 +3709,7 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _LINUX_F2FS_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 51de1bc6faea..c67ff5057472 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1023,7 +1023,7 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - return -EFAULT; + return -EFSCORRUPTED; } if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c65f87f11de0..6691f526fa40 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -660,7 +660,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } goto got_it; @@ -678,7 +678,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } got_it: @@ -1454,7 +1454,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_err(sbi, "Should run fsck to repair first."); - return -EINVAL; + return -EFSCORRUPTED; } if (test_opt(sbi, DISABLE_CHECKPOINT)) { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2e4913a34e14..e843890b1a0f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -142,7 +142,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_sbi_flag(fio.sbi, SBI_NEED_FSCK); f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dn->inode->i_ino, dn->data_blkaddr); - return -EINVAL; + return -EFSCORRUPTED; } f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); @@ -383,7 +383,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dir->i_ino, dn.data_blkaddr); - err = -EINVAL; + err = -EFSCORRUPTED; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9ea3aedb8213..a33d7a849b2d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -74,7 +74,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; return 0; } @@ -358,7 +358,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); - return -EINVAL; + return -EFSCORRUPTED; } /* check data exist */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4a935a26bf82..3a46bcd75365 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -36,7 +36,7 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", __func__, nid); - return -EINVAL; + return -EFSCORRUPTED; } return 0; } @@ -1286,7 +1286,7 @@ static int read_node_page(struct page *page, int op_flags) if (PageUptodate(page)) { if (!f2fs_inode_chksum_verify(sbi, page)) { ClearPageUptodate(page); - return -EBADMSG; + return -EFSBADCRC; } return LOCKED_PAGE; } @@ -1372,7 +1372,7 @@ repeat: } if (!f2fs_inode_chksum_verify(sbi, page)) { - err = -EBADMSG; + err = -EFSBADCRC; goto out_err; } page_hit: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 72c2c4ba795f..783773e4560d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -553,7 +553,7 @@ retry_dn: f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", inode->i_ino, ofs_of_node(dn.node_page), ofs_of_node(page)); - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } @@ -565,13 +565,13 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ee96e6fb1a50..478284db3065 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2819,7 +2819,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); - return -EIO; + return -EFSCORRUPTED; } /* start/end segment number in main_area */ @@ -3244,7 +3244,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); - return -EFAULT; + return -EFSCORRUPTED; } stat_inc_inplace_blocks(fio->sbi); @@ -4153,7 +4153,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Wrong journal entry on segno %u", start); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; break; } @@ -4193,7 +4193,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "SIT is corrupted node# %u vs %u", total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; } return err; @@ -4311,7 +4311,7 @@ out: "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", i, curseg->segno, curseg->alloc_type, curseg->next_blkoff, blkofs); - return -EINVAL; + return -EFSCORRUPTED; } } return 0; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2ae6df03b998..b74602813a05 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -696,7 +696,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } /* check segment usage, and check boundary of a given segment number */ @@ -705,7 +705,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Wrong valid blocks %d or segno %u", GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b5255f293b25..a5df58021157 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2851,7 +2851,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, if (sanity_check_raw_super(sbi, bh)) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - err = -EINVAL; + err = -EFSCORRUPTED; brelse(bh); continue; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e791741d193b..963242018663 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -346,7 +346,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); if (!*xe) { - err = -EFAULT; + err = -EFSCORRUPTED; goto out; } check: @@ -622,7 +622,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); if (!here) { - error = -EFAULT; + error = -EFSCORRUPTED; goto exit; } From 33e0144f7f293746d289ec43b6647320cc9bb1ff Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Jun 2019 11:36:15 +0800 Subject: [PATCH 23/47] f2fs: set SBI_NEED_FSCK for xattr corruption case If xattr is corrupted, let's print kernel message and set SBI_NEED_FSCK for further repair. Reported-by: Pavel Machek Signed-off-by: Chao Yu Acked-by: Pavel Machek Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 963242018663..b32c45621679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -346,6 +346,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); if (!*xe) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); err = -EFSCORRUPTED; goto out; } @@ -622,6 +625,9 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); if (!here) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; goto exit; } From 83fe20b2cb79175b1e6ac1795e48eeaa2a688f61 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 May 2019 10:58:45 -0700 Subject: [PATCH 24/47] f2fs: add a rw_sem to cover quota flag changes Two paths to update quota and f2fs_lock_op: 1. - lock_op | - quota_update `- unlock_op 2. - quota_update - lock_op `- unlock_op But, we need to make a transaction on quota_update + lock_op in #2 case. So, this patch introduces: 1. lock_op 2. down_write 3. check __need_flush 4. up_write 5. if there is dirty quota entries, flush them 6. otherwise, good to go Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 41 +++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 38 +++++++++++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2d23671d2034..a0eef95b9e0e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1133,17 +1133,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) static bool __need_flush_quota(struct f2fs_sb_info *sbi) { + bool ret = false; + if (!is_journalled_quota(sbi)) return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) - return true; - if (get_pages(sbi, F2FS_DIRTY_QDATA)) - return true; - return false; + + down_write(&sbi->quota_sem); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) { + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + ret = true; + } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { + ret = true; + } + up_write(&sbi->quota_sem); + return ret; } /* @@ -1162,26 +1169,22 @@ static int block_operations(struct f2fs_sb_info *sbi) blk_start_plug(&plug); retry_flush_quotas: + f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { int locked; if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); - f2fs_lock_all(sbi); + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); goto retry_flush_dents; } - clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + f2fs_unlock_all(sbi); /* only failed during mount/umount/freeze/quotactl */ locked = down_read_trylock(&sbi->sb->s_umount); f2fs_quota_sync(sbi->sb, -1); if (locked) up_read(&sbi->sb->s_umount); - } - - f2fs_lock_all(sbi); - if (__need_flush_quota(sbi)) { - f2fs_unlock_all(sbi); cond_resched(); goto retry_flush_quotas; } @@ -1203,12 +1206,6 @@ retry_flush_dents: */ down_write(&sbi->node_change); - if (__need_flush_quota(sbi)) { - up_write(&sbi->node_change); - f2fs_unlock_all(sbi); - goto retry_flush_quotas; - } - if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index da818d104a30..ef5ab5ed9940 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1256,6 +1256,7 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ + struct rw_semaphore quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a5df58021157..486003b5dfab 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1924,6 +1924,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) int cnt; int ret; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + + down_read(&sbi->quota_sem); ret = dquot_writeback_dquots(sb, type); if (ret) goto out; @@ -1961,6 +1973,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) out: if (ret) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); return ret; } @@ -2052,32 +2066,40 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) static int f2fs_dquot_commit(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_acquire(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); - + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_release(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_release(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -2087,22 +2109,27 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit_info(sb, type); if (ret < 0) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -3172,6 +3199,7 @@ try_onemore: } init_rwsem(&sbi->cp_rwsem); + init_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); From 38455d0ad9e59614c9d84f12659f973f61966960 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Thu, 6 Jun 2019 15:08:13 +0530 Subject: [PATCH 25/47] f2fs: fix is_idle() check for discard type The discard thread should issue upto dpolicy->max_requests at once and wait for all those discard requests at once it reaches dpolicy->max_requests. It should then sleep for dpolicy->min_interval timeout before issuing the next batch of discard requests. But in the current code of is_idle(), it checks for dcc_info->queued_discard and aborts issuing the discard batch of max_requests. This dcc_info->queued_discard will be true always once one discard command is issued. It is thus resulting into this type of discard request pattern - - Issue discard request#1 - is_idle() returns false, discard thread waits for request#1 and then sleeps for min_interval 50ms. - Issue discard request#2 - is_idle() returns false, discard thread waits for request#2 and then sleeps for min_interval 50ms. - and so on for all other discard requests, assuming f2fs is idle w.r.t other conditions. With this fix, the pattern will look like this - - Issue discard request#1 - Issue discard request#2 and so on upto max_requests of 8 - Issue discard request#8 - wait for min_interval 50ms. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef5ab5ed9940..4379d72010d8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2228,7 +2228,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) get_pages(sbi, F2FS_DIO_WRITE)) return false; - if (SM_I(sbi) && SM_I(sbi)->dcc_info && + if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) return false; From 6d8762bddfedd152b7be3246826cfd7e5bca58d8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Jun 2019 18:23:05 -0700 Subject: [PATCH 26/47] f2fs: allocate blocks for pinned file This patch allows fallocate to allocate physical blocks for pinned file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 25 +++++++++++++++++++++++++ fs/f2fs/file.c | 7 ++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index bebd1be3ba49..496fa28b2492 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -804,3 +804,28 @@ WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " WRITE_LIFE_NONE WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM WRITE_LIFE_LONG " WRITE_LIFE_LONG + +Fallocate(2) Policy +------------------- + +The default policy follows the below posix rule. + +Allocating disk space + The default operation (i.e., mode is zero) of fallocate() allocates + the disk space within the range specified by offset and len. The + file size (as reported by stat(2)) will be changed if offset+len is + greater than the file size. Any subregion within the range specified + by offset and len that did not contain data before the call will be + initialized to zero. This default behavior closely resembles the + behavior of the posix_fallocate(3) library function, and is intended + as a method of optimally implementing that function. + +However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to +fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having +zero or random data, which is useful to the below scenario where: + 1. create(fd) + 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE) + 3. fallocate(fd, 0, 0, size) + 4. address = fibmap(fd, offset) + 5. open(blkdev) + 6. write(blkdev, address) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c67ff5057472..b164f48e0f31 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1528,7 +1528,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_is_pinned_file(inode)) + map.m_seg_type = CURSEG_COLD_DATA; + + err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? + F2FS_GET_BLOCK_PRE_DIO : + F2FS_GET_BLOCK_PRE_AIO)); if (err) { pgoff_t last_off; From 51e4c254c1b8142bbd82e6e7b678c47bff127874 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 Jul 2019 19:15:29 -0700 Subject: [PATCH 27/47] f2fs: support swap file w/ DIO Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 142 ++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 5 +- include/trace/events/f2fs.h | 11 +-- 3 files changed, 143 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d074491325c..4463b3f3e310 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,7 @@ static bool __is_cp_guaranteed(struct page *page) static enum count_type __read_io_type(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_file_mapping(page); if (mapping) { struct inode *inode = mapping->host; @@ -1585,7 +1586,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, sector_t block_nr; int ret = 0; - block_in_file = (sector_t)page->index; + block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; @@ -1618,7 +1619,8 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; SetPageMappedToDisk(page); - if (!PageUptodate(page) && !cleancache_get_page(page)) { + if (!PageUptodate(page) && (!PageSwapCache(page) && + !cleancache_get_page(page))) { SetPageUptodate(page); goto confused; } @@ -1716,7 +1718,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, + page_index(page), readahead_gfp_mask(mapping))) goto next_page; } @@ -1740,7 +1742,7 @@ next_page: static int f2fs_read_data_page(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); @@ -1749,7 +1751,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false); + ret = f2fs_mpage_readpages(page_file_mapping(page), + NULL, page, 1, false); return ret; } @@ -2851,13 +2854,14 @@ int f2fs_release_page(struct page *page, gfp_t wait) static int f2fs_set_data_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page_file_mapping(page)->host; trace_f2fs_set_page_dirty(page, DATA); if (!PageUptodate(page)) SetPageUptodate(page); + if (PageSwapCache(page)) + return __set_page_dirty_nobuffers(page); if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { @@ -2949,6 +2953,126 @@ int f2fs_migrate_page(struct address_space *mapping, } #endif +#ifdef CONFIG_SWAP +/* Copied from generic_swapfile_activate() to check any holes */ +static int check_swap_activate(struct file *swap_file, unsigned int max) +{ + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; + unsigned blocks_per_page; + unsigned long page_no; + unsigned blkbits; + sector_t probe_block; + sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; + + blkbits = inode->i_blkbits; + blocks_per_page = PAGE_SIZE >> blkbits; + + /* + * Map all the blocks into the extent list. This code doesn't try + * to be very smart. + */ + probe_block = 0; + page_no = 0; + last_block = i_size_read(inode) >> blkbits; + while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + unsigned block_in_page; + sector_t first_block; + + cond_resched(); + + first_block = bmap(inode, probe_block); + if (first_block == 0) + goto bad_bmap; + + /* + * It must be PAGE_SIZE aligned on-disk + */ + if (first_block & (blocks_per_page - 1)) { + probe_block++; + goto reprobe; + } + + for (block_in_page = 1; block_in_page < blocks_per_page; + block_in_page++) { + sector_t block; + + block = bmap(inode, probe_block + block_in_page); + if (block == 0) + goto bad_bmap; + if (block != first_block + block_in_page) { + /* Discontiguity */ + probe_block++; + goto reprobe; + } + } + + first_block >>= (PAGE_SHIFT - blkbits); + if (page_no) { /* exclude the header page */ + if (first_block < lowest_block) + lowest_block = first_block; + if (first_block > highest_block) + highest_block = first_block; + } + + page_no++; + probe_block += blocks_per_page; +reprobe: + continue; + } + return 0; + +bad_bmap: + pr_err("swapon: swapfile has holes\n"); + return -EINVAL; +} + +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + struct inode *inode = file_inode(file); + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + + ret = check_swap_activate(file, sis->max); + if (ret) + return ret; + + set_inode_flag(inode, FI_PIN_FILE); + f2fs_precache_extents(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; +} + +static void f2fs_swap_deactivate(struct file *file) +{ + struct inode *inode = file_inode(file); + + clear_inode_flag(inode, FI_PIN_FILE); +} +#else +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + return -EOPNOTSUPP; +} + +static void f2fs_swap_deactivate(struct file *file) +{ +} +#endif + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -2961,6 +3085,8 @@ const struct address_space_operations f2fs_dblock_aops = { .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, + .swap_activate = f2fs_swap_activate, + .swap_deactivate = f2fs_swap_deactivate, #ifdef CONFIG_MIGRATION .migratepage = f2fs_migrate_page, #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4379d72010d8..c2826fdf1d6c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1499,7 +1499,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page->mapping); + return F2FS_M_SB(page_file_mapping(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -3684,7 +3684,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && + !(inode->i_flags & S_SWAPFILE)) return true; return false; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index bd334ecfc3c2..bcd512b305d3 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1029,8 +1029,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ), TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; + __entry->dev = page_file_mapping(page)->host->i_sb->s_dev; + __entry->ino = page_file_mapping(page)->host->i_ino; __entry->index = page->index; __entry->old_blkaddr = fio->old_blkaddr; __entry->new_blkaddr = fio->new_blkaddr; @@ -1217,10 +1217,11 @@ DECLARE_EVENT_CLASS(f2fs__page, ), TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; + __entry->dev = page_file_mapping(page)->host->i_sb->s_dev; + __entry->ino = page_file_mapping(page)->host->i_ino; __entry->type = type; - __entry->dir = S_ISDIR(page->mapping->host->i_mode); + __entry->dir = + S_ISDIR(page_file_mapping(page)->host->i_mode); __entry->index = page->index; __entry->dirty = PageDirty(page); __entry->uptodate = PageUptodate(page); From 1bde218222be28fa01195dbf276b80f560bc5bd4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 4 Jul 2019 23:03:08 -0700 Subject: [PATCH 28/47] f2fs: allow all the users to pin a file This patch allows users to pin files. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b164f48e0f31..f8d46df8fa9e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2964,9 +2964,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; From 79e79a1966be027da94ca8259cd6d25a6e87aee2 Mon Sep 17 00:00:00 2001 From: Heng Xiao Date: Wed, 3 Jul 2019 10:29:57 +0800 Subject: [PATCH 29/47] f2fs: fix to avoid long latency during umount In umount, we give an constand time to handle pending discard, previously, in __issue_discard_cmd() we missed to check timeout condition in loop, result in delaying long time, fix it. Signed-off-by: Heng Xiao [Chao Yu: add commit message] Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 478284db3065..2d1d7baaf1b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1497,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; From b319554831b21884b251330345a5e7e303c604bf Mon Sep 17 00:00:00 2001 From: Ocean Chen Date: Mon, 8 Jul 2019 12:34:56 +0800 Subject: [PATCH 30/47] f2fs: avoid out-of-range memory access blkoff_off might over 512 due to fs corrupt or security vulnerability. That should be checked before being using. Use ENTRIES_IN_SUM to protect invalid value in cur_data_blkoff. Signed-off-by: Ocean Chen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2d1d7baaf1b6..a661ac32e829 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3447,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + if (blk_off > ENTRIES_IN_SUM) { + f2fs_bug_on(sbi, 1); + f2fs_put_page(page, 1); + return -EFAULT; + } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; From 363a72e3ac9932be1bee7836074e0583016ac129 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 11 Jul 2019 09:29:15 +0800 Subject: [PATCH 31/47] f2fs: improve print log in f2fs_sanity_check_ckpt() As Park Ju Hyung suggested: "I'd like to suggest to write down an actual version of f2fs-tools here as we've seen older versions of fsck doing even more damage and the users might not have the latest f2fs-tools installed." This patch give a more detailed info of how we fix such corruption to user to avoid damageable repair with low version fsck. Signed-off-by: Park Ju Hyung Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 486003b5dfab..f236eef18a4f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2693,7 +2693,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { - f2fs_warn(sbi, "layout of large_nat_bitmap is deprecated, run fsck to repair, chksum_offset: %u", + f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, " + "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, " + "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"", le32_to_cpu(ckpt->checksum_offset)); return 1; } From 95e2ce5c62703e255f5472537ce5c92f27fefda1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Mar 2019 10:23:33 -0700 Subject: [PATCH 32/47] fscrypt: drop inode argument from fscrypt_get_ctx() The only reason the inode is being passed to fscrypt_get_ctx() is to verify that the encryption key is available. However, all callers already ensure this because if we get as far as trying to do I/O to an encrypted file without the key, there's already a bug. Therefore, remove this unnecessary argument. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/bio.c | 2 +- fs/crypto/crypto.c | 16 +++++----------- fs/ext4/readpage.c | 2 +- include/linux/fscrypt.h | 5 ++--- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 0959044c5cee..2a3a877605d1 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -103,7 +103,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - ctx = fscrypt_get_ctx(inode, GFP_NOFS); + ctx = fscrypt_get_ctx(GFP_NOFS); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 4dc788e3bc96..5efc494a4e38 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -87,23 +87,17 @@ EXPORT_SYMBOL(fscrypt_release_ctx); /** * fscrypt_get_ctx() - Gets an encryption context - * @inode: The inode for which we are doing the crypto * @gfp_flags: The gfp flag for memory allocation * * Allocates and initializes an encryption context. * - * Return: An allocated and initialized encryption context on success; error - * value or NULL otherwise. + * Return: A new encryption context on success; an ERR_PTR() otherwise. */ -struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) +struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) { - struct fscrypt_ctx *ctx = NULL; - struct fscrypt_info *ci = inode->i_crypt_info; + struct fscrypt_ctx *ctx; unsigned long flags; - if (ci == NULL) - return ERR_PTR(-ENOKEY); - /* * We first try getting the ctx from a free list because in * the common case the ctx will have an allocated and @@ -258,9 +252,9 @@ struct page *fscrypt_encrypt_page(const struct inode *inode, BUG_ON(!PageLocked(page)); - ctx = fscrypt_get_ctx(inode, gfp_flags); + ctx = fscrypt_get_ctx(gfp_flags); if (IS_ERR(ctx)) - return (struct page *)ctx; + return ERR_CAST(ctx); /* The encryption operation will require a bounce page. */ ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags); diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index a18d02c9b9ec..41f6a34fc3d1 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -243,7 +243,7 @@ int ext4_mpage_readpages(struct address_space *mapping, struct fscrypt_ctx *ctx = NULL; if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); + ctx = fscrypt_get_ctx(GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; } diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index e5194fc3983e..6cf8a34523ff 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -90,7 +90,7 @@ static inline bool fscrypt_dummy_context_enabled(struct inode *inode) /* crypto.c */ extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); +extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, @@ -247,8 +247,7 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, - gfp_t gfp_flags) +static inline struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } From 34017243afdb1c6319553946d8b061f309e6b6ea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 15 Mar 2019 14:16:32 -0700 Subject: [PATCH 33/47] fscrypt: remove WARN_ON_ONCE() when decryption fails If decrypting a block fails, fscrypt did a WARN_ON_ONCE(). But WARN is meant for kernel bugs, which this isn't; this could be hit by fuzzers using fault injection, for example. Also, there is already a proper warning message logged in fscrypt_do_page_crypto(), so the WARN doesn't add much. Just remove the unnessary WARN. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/bio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 2a3a877605d1..0cecd777f0f9 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -36,12 +36,10 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) int ret = fscrypt_decrypt_page(page->mapping->host, page, PAGE_SIZE, 0, page->index); - if (ret) { - WARN_ON_ONCE(1); + if (ret) SetPageError(page); - } else if (done) { + else if (done) SetPageUptodate(page); - } if (done) unlock_page(page); } From 7a8861d708fd738167e15d07eb15e7515e1a4bdd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Apr 2019 14:32:15 -0700 Subject: [PATCH 34/47] fscrypt: use READ_ONCE() to access ->i_crypt_info ->i_crypt_info starts out NULL and may later be locklessly set to a non-NULL value by the cmpxchg() in fscrypt_get_encryption_info(). But ->i_crypt_info is used directly, which technically is incorrect. It's a data race, and it doesn't include the data dependency barrier needed to safely dereference the pointer on at least one architecture. Fix this by using READ_ONCE() instead. Note: we don't need to use smp_load_acquire(), since dereferencing the pointer only requires a data dependency barrier, which is already included in READ_ONCE(). We also don't need READ_ONCE() in places where ->i_crypt_info is unconditionally dereferenced, since it must have already been checked. Also downgrade the cmpxchg() to cmpxchg_release(), since RELEASE semantics are sufficient on the write side. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/crypto.c | 2 +- fs/crypto/fname.c | 4 ++-- fs/crypto/keyinfo.c | 4 ++-- fs/crypto/policy.c | 6 +++--- include/linux/fscrypt.h | 3 ++- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 5efc494a4e38..098807788257 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -328,7 +328,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) spin_lock(&dentry->d_lock); cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; spin_unlock(&dentry->d_lock); - dir_has_key = (d_inode(dir)->i_crypt_info != NULL); + dir_has_key = fscrypt_has_encryption_key(d_inode(dir)); dput(dir); /* diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 7ff40a73dbec..050384c79f40 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -269,7 +269,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, if (iname->len < FS_CRYPTO_BLOCK_SIZE) return -EUCLEAN; - if (inode->i_crypt_info) + if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { @@ -336,7 +336,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (ret) return ret; - if (dir->i_crypt_info) { + if (fscrypt_has_encryption_key(dir)) { if (!fscrypt_fname_encrypted_size(dir, iname->len, dir->i_sb->s_cop->max_namelen, &fname->crypto_buf.len)) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 1e11a683f63d..855935853e0c 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -509,7 +509,7 @@ int fscrypt_get_encryption_info(struct inode *inode) u8 *raw_key = NULL; int res; - if (inode->i_crypt_info) + if (fscrypt_has_encryption_key(inode)) return 0; res = fscrypt_initialize(inode->i_sb->s_cop->flags); @@ -573,7 +573,7 @@ int fscrypt_get_encryption_info(struct inode *inode) if (res) goto out; - if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) + if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) crypt_info = NULL; out: if (res == -ENOKEY) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index bd7eaf9b3f00..d536889ac31b 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -194,8 +194,8 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) res = fscrypt_get_encryption_info(child); if (res) return 0; - parent_ci = parent->i_crypt_info; - child_ci = child->i_crypt_info; + parent_ci = READ_ONCE(parent->i_crypt_info); + child_ci = READ_ONCE(child->i_crypt_info); if (parent_ci && child_ci) { return memcmp(parent_ci->ci_master_key_descriptor, @@ -246,7 +246,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, if (res < 0) return res; - ci = parent->i_crypt_info; + ci = READ_ONCE(parent->i_crypt_info); if (ci == NULL) return -ENOKEY; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 6cf8a34523ff..ec8ab7108599 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -79,7 +79,8 @@ struct fscrypt_ctx { static inline bool fscrypt_has_encryption_key(const struct inode *inode) { - return (inode->i_crypt_info != NULL); + /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */ + return READ_ONCE(inode->i_crypt_info) != NULL; } static inline bool fscrypt_dummy_context_enabled(struct inode *inode) From 37b434e95051193d096116b8e78917313ebdd7ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Mar 2019 11:39:09 -0700 Subject: [PATCH 35/47] fscrypt: clean up and improve dentry revalidation Make various improvements to fscrypt dentry revalidation: - Don't try to handle the case where the per-directory key is removed, as this can't happen without the inode (and dentries) being evicted. - Flag ciphertext dentries rather than plaintext dentries, since it's ciphertext dentries that need the special handling. - Avoid doing unnecessary work for non-ciphertext dentries. - When revalidating ciphertext dentries, try to set up the directory's i_crypt_info to make sure the key is really still absent, rather than invalidating all negative dentries as the previous code did. An old comment suggested we can't do this for locking reasons, but AFAICT this comment was outdated and it actually works fine. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/crypto.c | 58 +++++++++++++++++++++-------------------- fs/crypto/hooks.c | 4 +-- include/linux/dcache.h | 2 +- include/linux/fscrypt.h | 6 ++--- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 098807788257..68e2ca4c4af6 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -307,45 +307,47 @@ int fscrypt_decrypt_page(const struct inode *inode, struct page *page, EXPORT_SYMBOL(fscrypt_decrypt_page); /* - * Validate dentries for encrypted directories to make sure we aren't - * potentially caching stale data after a key has been added or - * removed. + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. */ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; - int dir_has_key, cached_with_key; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ if (flags & LOOKUP_RCU) return -ECHILD; dir = dget_parent(dentry); - if (!IS_ENCRYPTED(d_inode(dir))) { - dput(dir); - return 0; - } - - spin_lock(&dentry->d_lock); - cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; - spin_unlock(&dentry->d_lock); - dir_has_key = fscrypt_has_encryption_key(d_inode(dir)); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); dput(dir); - /* - * If the dentry was cached without the key, and it is a - * negative dentry, it might be a valid name. We can't check - * if the key has since been made available due to locking - * reasons, so we fail the validation so ext4_lookup() can do - * this check. - * - * We also fail the validation if the dentry was created with - * the key present, but we no longer have the key, or vice versa. - */ - if ((!cached_with_key && d_is_negative(dentry)) || - (!cached_with_key && dir_has_key) || - (cached_with_key && !dir_has_key)) - return 0; - return 1; + if (err < 0) + return err; + + return valid; } const struct dentry_operations fscrypt_d_ops = { diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 56debb1fcf5e..a9492f75bbe1 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -101,9 +101,9 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) if (err) return err; - if (fscrypt_has_encryption_key(dir)) { + if (!fscrypt_has_encryption_key(dir)) { spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ef4b70f64f33..4c6b92d5ac26 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -211,7 +211,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_ENCRYPTED_WITH_KEY 0x02000000 /* dir is encrypted with a valid key */ +#define DCACHE_ENCRYPTED_NAME 0x02000000 /* Encrypted name (dir key was unavailable) */ #define DCACHE_OP_REAL 0x04000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index ec8ab7108599..09e368a515d1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -545,10 +545,8 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * To allow invalidating stale dentries if the directory's encryption key is - * added later, we also install a custom ->d_revalidate() method and use the - * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a - * plaintext name (flag set) or a ciphertext name (flag cleared). + * This also installs a custom ->d_revalidate() method which will invalidate the + * dentry if it was created without the key and the key is later added. * * Return: 0 on success, -errno if a problem occurred while setting up the * encryption key From 081985b7e2fa93f619364ec4727a47235f05d606 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Mar 2019 11:39:10 -0700 Subject: [PATCH 36/47] fscrypt: fix race allowing rename() and link() of ciphertext dentries Close some race conditions where fscrypt allowed rename() and link() on ciphertext dentries that had been looked up just prior to the key being concurrently added. It's better to return -ENOKEY in this case. This avoids doing the nonsensical thing of encrypting the names a second time when searching for the actual on-disk dir entries. It also guarantees that DCACHE_ENCRYPTED_NAME dentries are never rename()d, so the dcache won't have support all possible combinations of moving DCACHE_ENCRYPTED_NAME around during __d_move(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/hooks.c | 12 +++++++++++- include/linux/fscrypt.h | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a9492f75bbe1..2e7498a821a4 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -49,7 +49,8 @@ int fscrypt_file_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(fscrypt_file_open); -int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) +int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry) { int err; @@ -57,6 +58,10 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) if (err) return err; + /* ... in case we looked up ciphertext name before key was added */ + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) + return -ENOKEY; + if (!fscrypt_has_permitted_context(dir, inode)) return -EXDEV; @@ -78,6 +83,11 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) return err; + /* ... in case we looked up ciphertext name(s) before key was added */ + if ((old_dentry->d_flags | new_dentry->d_flags) & + DCACHE_ENCRYPTED_NAME) + return -ENOKEY; + if (old_dir != new_dir) { if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 09e368a515d1..855f743c226e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -215,7 +215,8 @@ extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, /* hooks.c */ extern int fscrypt_file_open(struct inode *inode, struct file *filp); -extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry); extern int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -401,8 +402,8 @@ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) return 0; } -static inline int __fscrypt_prepare_link(struct inode *inode, - struct inode *dir) +static inline int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry) { return -EOPNOTSUPP; } @@ -497,7 +498,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, struct dentry *dentry) { if (IS_ENCRYPTED(dir)) - return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return __fscrypt_prepare_link(d_inode(old_dentry), dir, dentry); return 0; } From 983888f5138e1a791eba3f6bd348d20c091f6b4f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Mar 2019 11:39:12 -0700 Subject: [PATCH 37/47] fscrypt: only set dentry_operations on ciphertext dentries Plaintext dentries are always valid, so only set fscrypt_d_ops on ciphertext dentries. Besides marginally improved performance, this allows overlayfs to use an fscrypt-encrypted upperdir, provided that all the following are true: (1) The fscrypt encryption key is placed in the keyring before mounting overlayfs, and remains while the overlayfs is mounted. (2) The overlayfs workdir uses the same encryption policy. (3) No dentries for the ciphertext names of subdirectories have been created in the upperdir or workdir yet. (Since otherwise d_splice_alias() will reuse the old dentry with ->d_op set.) One potential use case is using an ephemeral encryption key to encrypt all files created or changed by a container, so that they can be securely erased ("crypto-shredded") after the container stops. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/hooks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 2e7498a821a4..9d8910e86ee5 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -115,9 +115,8 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); + d_set_d_op(dentry, &fscrypt_d_ops); } - - d_set_d_op(dentry, &fscrypt_d_ops); return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); From 716c370d9bd9e947e7ab57128d0646d87c8a745c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Mar 2019 11:39:13 -0700 Subject: [PATCH 38/47] fscrypt: fix race where ->lookup() marks plaintext dentry as ciphertext ->lookup() in an encrypted directory begins as follows: 1. fscrypt_prepare_lookup(): a. Try to load the directory's encryption key. b. If the key is unavailable, mark the dentry as a ciphertext name via d_flags. 2. fscrypt_setup_filename(): a. Try to load the directory's encryption key. b. If the key is available, encrypt the name (treated as a plaintext name) to get the on-disk name. Otherwise decode the name (treated as a ciphertext name) to get the on-disk name. But if the key is concurrently added, it may be found at (2a) but not at (1a). In this case, the dentry will be wrongly marked as a ciphertext name even though it was actually treated as plaintext. This will cause the dentry to be wrongly invalidated on the next lookup, potentially causing problems. For example, if the racy ->lookup() was part of sys_mount(), then the new mount will be detached when anything tries to access it. This is despite the mountpoint having a plaintext path, which should remain valid now that the key was added. Of course, this is only possible if there's a userspace race. Still, the additional kernel-side race is confusing and unexpected. Close the kernel-side race by changing fscrypt_prepare_lookup() to also set the on-disk filename (step 2b), consistent with the d_flags update. Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/fname.c | 1 + fs/crypto/hooks.c | 11 +++--- fs/ext4/ext4.h | 62 +++++++++++++++++++++++++-------- fs/ext4/namei.c | 76 ++++++++++++++++++++++++++++------------- fs/f2fs/namei.c | 17 +++++---- fs/ubifs/dir.c | 8 ++--- include/linux/fscrypt.h | 30 ++++++++++------ 7 files changed, 139 insertions(+), 66 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 050384c79f40..eccea3d8f923 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -356,6 +356,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, } if (!lookup) return -ENOKEY; + fname->is_ciphertext_name = true; /* * We don't have the key and we are doing a lookup; decode the diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 9d8910e86ee5..042d5b44f4ed 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -104,20 +104,21 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, } EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); -int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) +int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *fname) { - int err = fscrypt_get_encryption_info(dir); + int err = fscrypt_setup_filename(dir, &dentry->d_name, 1, fname); - if (err) + if (err && err != -ENOENT) return err; - if (!fscrypt_has_encryption_key(dir)) { + if (fname->is_ciphertext_name) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); d_set_d_op(dentry, &fscrypt_d_ops); } - return 0; + return err; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 055056632895..ff9e16c9d270 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2283,23 +2283,47 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); #ifdef CONFIG_FS_ENCRYPTION +static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, + const struct fscrypt_name *src) +{ + memset(dst, 0, sizeof(*dst)); + + dst->usr_fname = src->usr_fname; + dst->disk_name = src->disk_name; + dst->hinfo.hash = src->hash; + dst->hinfo.minor_hash = src->minor_hash; + dst->crypto_buf = src->crypto_buf; +} + static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct ext4_filename *fname) + const struct qstr *iname, + int lookup, + struct ext4_filename *fname) { struct fscrypt_name name; int err; - memset(fname, 0, sizeof(struct ext4_filename)); - err = fscrypt_setup_filename(dir, iname, lookup, &name); + if (err) + return err; - fname->usr_fname = name.usr_fname; - fname->disk_name = name.disk_name; - fname->hinfo.hash = name.hash; - fname->hinfo.minor_hash = name.minor_hash; - fname->crypto_buf = name.crypto_buf; - return err; + ext4_fname_from_fscrypt_name(fname, &name); + return 0; +} + +static inline int ext4_fname_prepare_lookup(struct inode *dir, + struct dentry *dentry, + struct ext4_filename *fname) +{ + struct fscrypt_name name; + int err; + + err = fscrypt_prepare_lookup(dir, dentry, &name); + if (err) + return err; + + ext4_fname_from_fscrypt_name(fname, &name); + return 0; } static inline void ext4_fname_free_filename(struct ext4_filename *fname) @@ -2313,19 +2337,27 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; } -#else +#else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct ext4_filename *fname) + const struct qstr *iname, + int lookup, + struct ext4_filename *fname) { fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; return 0; } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } -#endif +static inline int ext4_fname_prepare_lookup(struct inode *dir, + struct dentry *dentry, + struct ext4_filename *fname) +{ + return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); +} + +static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +#endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5e81b742e2c0..b5760c1f4741 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1326,7 +1326,7 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, } /* - * ext4_find_entry() + * __ext4_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the cache buffer in which the entry was found, and the entry @@ -1336,39 +1336,32 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext4_find_entry (struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, - int *inlined) +static struct buffer_head *__ext4_find_entry(struct inode *dir, + struct ext4_filename *fname, + struct ext4_dir_entry_2 **res_dir, + int *inlined) { struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; ext4_lblk_t start, block; - const u8 *name = d_name->name; + const u8 *name = fname->usr_fname->name; size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ size_t ra_ptr = 0; /* Current index into readahead buffer */ ext4_lblk_t nblocks; int i, namelen, retval; - struct ext4_filename fname; *res_dir = NULL; sb = dir->i_sb; - namelen = d_name->len; + namelen = fname->usr_fname->len; if (namelen > EXT4_NAME_LEN) return NULL; - retval = ext4_fname_setup_filename(dir, d_name, 1, &fname); - if (retval == -ENOENT) - return NULL; - if (retval) - return ERR_PTR(retval); - if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, &fname, res_dir, + ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); if (has_inline_data) { if (inlined) @@ -1388,7 +1381,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto restart; } if (is_dx(dir)) { - ret = ext4_dx_find_entry(dir, &fname, res_dir); + ret = ext4_dx_find_entry(dir, fname, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the @@ -1452,7 +1445,7 @@ restart: goto cleanup_and_exit; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, &fname, + i = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -1483,10 +1476,50 @@ cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) brelse(bh_use[ra_ptr]); - ext4_fname_free_filename(&fname); return ret; } +static struct buffer_head *ext4_find_entry(struct inode *dir, + const struct qstr *d_name, + struct ext4_dir_entry_2 **res_dir, + int *inlined) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_setup_filename(dir, d_name, 1, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, inlined); + + ext4_fname_free_filename(&fname); + return bh; +} + +static struct buffer_head *ext4_lookup_entry(struct inode *dir, + struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_prepare_lookup(dir, dentry, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, NULL); + + ext4_fname_free_filename(&fname); + return bh; +} + static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir) @@ -1545,16 +1578,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct inode *inode; struct ext4_dir_entry_2 *de; struct buffer_head *bh; - int err; - - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - return ERR_PTR(err); if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + bh = ext4_lookup_entry(dir, dentry, &de); if (IS_ERR(bh)) return (struct dentry *) bh; inode = NULL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0bdc64994caf..c5b99042e6f2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -435,19 +435,23 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); + struct fscrypt_name fname; trace_f2fs_lookup_start(dir, dentry, flags); - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - goto out; - if (dentry->d_name.len > F2FS_NAME_LEN) { err = -ENAMETOOLONG; goto out; } - de = f2fs_find_entry(dir, &dentry->d_name, &page); + err = fscrypt_prepare_lookup(dir, dentry, &fname); + if (err == -ENOENT) + goto out_splice; + if (err) + goto out; + de = __f2fs_find_entry(dir, &fname, &page); + fscrypt_free_filename(&fname); + if (!de) { if (IS_ERR(page)) { err = PTR_ERR(page); @@ -486,8 +490,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } out_splice: new = d_splice_alias(inode, dentry); - if (IS_ERR(new)) - err = PTR_ERR(new); + err = PTR_ERR_OR_ZERO(new); trace_f2fs_lookup_end(dir, dentry, ino, err); return new; out_iput: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5767b373a8ff..b73de6d04fa3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -220,11 +220,9 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - return ERR_PTR(err); - - err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm); + err = fscrypt_prepare_lookup(dir, dentry, &nm); + if (err == -ENOENT) + return d_splice_alias(NULL, dentry); if (err) return ERR_PTR(err); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 855f743c226e..9bd341e2bf56 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -33,6 +33,7 @@ struct fscrypt_name { u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; + bool is_ciphertext_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } @@ -222,7 +223,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *fname); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -331,7 +333,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; - memset(fname, 0, sizeof(struct fscrypt_name)); + memset(fname, 0, sizeof(*fname)); fname->usr_fname = iname; fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; @@ -418,7 +420,8 @@ static inline int __fscrypt_prepare_rename(struct inode *old_dir, } static inline int __fscrypt_prepare_lookup(struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, + struct fscrypt_name *fname) { return -EOPNOTSUPP; } @@ -539,25 +542,32 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory * @dir: directory being searched * @dentry: filename being looked up - * @flags: lookup flags + * @fname: (output) the name to use to search the on-disk directory * - * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be - * done with or without the directory's encryption key; without the key, + * Prepare for ->lookup() in a directory which may be encrypted by determining + * the name that will actually be used to search the directory on-disk. Lookups + * can be done with or without the directory's encryption key; without the key, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. * - * Return: 0 on success, -errno if a problem occurred while setting up the - * encryption key + * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a + * correctly formed encoded ciphertext name, so a negative dentry should be + * created; or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) + struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) - return __fscrypt_prepare_lookup(dir, dentry); + return __fscrypt_prepare_lookup(dir, dentry, fname); + + memset(fname, 0, sizeof(*fname)); + fname->usr_fname = &dentry->d_name; + fname->disk_name.name = (unsigned char *)dentry->d_name.name; + fname->disk_name.len = dentry->d_name.len; return 0; } From 1f1be4a771ebb7aa4949fbcb693463da6edcf1ec Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 10 Apr 2019 13:21:15 -0700 Subject: [PATCH 39/47] fscrypt: cache decrypted symlink target in ->i_link Path lookups that traverse encrypted symlink(s) are very slow because each encrypted symlink needs to be decrypted each time it's followed. This also involves dropping out of rcu-walk mode. Make encrypted symlinks faster by caching the decrypted symlink target in ->i_link. The first call to fscrypt_get_symlink() sets it. Then, the existing VFS path lookup code uses the non-NULL ->i_link to take the fast path where ->get_link() isn't called, and lookups in rcu-walk mode remain in rcu-walk mode. Also set ->i_link immediately when a new encrypted symlink is created. To safely free the symlink target after an RCU grace period has elapsed, introduce a new function fscrypt_free_inode(), and make the relevant filesystems call it just before actually freeing the inode. Cc: Al Viro Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/hooks.c | 40 +++++++++++++++++++++++++++++++++------- fs/crypto/keyinfo.c | 21 +++++++++++++++++++++ fs/ext4/super.c | 3 +++ fs/f2fs/super.c | 3 +++ include/linux/fscrypt.h | 5 +++++ 5 files changed, 65 insertions(+), 7 deletions(-) diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 042d5b44f4ed..2dc22549d724 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -189,11 +189,9 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, sd->len = cpu_to_le16(ciphertext_len); err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); - if (err) { - if (!disk_link->name) - kfree(sd); - return err; - } + if (err) + goto err_free_sd; + /* * Null-terminating the ciphertext doesn't make sense, but we still * count the null terminator in the length, so we might as well @@ -201,9 +199,20 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, */ sd->encrypted_path[ciphertext_len] = '\0'; + /* Cache the plaintext symlink target for later use by get_link() */ + err = -ENOMEM; + inode->i_link = kmemdup(target, len + 1, GFP_NOFS); + if (!inode->i_link) + goto err_free_sd; + if (!disk_link->name) disk_link->name = (unsigned char *)sd; return 0; + +err_free_sd: + if (!disk_link->name) + kfree(sd); + return err; } EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); @@ -212,7 +221,7 @@ EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); * @inode: the symlink inode * @caddr: the on-disk contents of the symlink * @max_size: size of @caddr buffer - * @done: if successful, will be set up to free the returned target + * @done: if successful, will be set up to free the returned target if needed * * If the symlink's encryption key is available, we decrypt its target. * Otherwise, we encode its target for presentation. @@ -227,12 +236,18 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, { const struct fscrypt_symlink_data *sd; struct fscrypt_str cstr, pstr; + bool has_key; int err; /* This is for encrypted symlinks only */ if (WARN_ON(!IS_ENCRYPTED(inode))) return ERR_PTR(-EINVAL); + /* If the decrypted target is already cached, just return it. */ + pstr.name = READ_ONCE(inode->i_link); + if (pstr.name) + return pstr.name; + /* * Try to set up the symlink's encryption key, but we can continue * regardless of whether the key is available or not. @@ -240,6 +255,7 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, err = fscrypt_get_encryption_info(inode); if (err) return ERR_PTR(err); + has_key = fscrypt_has_encryption_key(inode); /* * For historical reasons, encrypted symlink targets are prefixed with @@ -271,7 +287,17 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, goto err_kfree; pstr.name[pstr.len] = '\0'; - set_delayed_call(done, kfree_link, pstr.name); + + /* + * Cache decrypted symlink targets in i_link for later use. Don't cache + * symlink targets encoded without the key, since those become outdated + * once the key is added. This pairs with the READ_ONCE() above and in + * the VFS path lookup code. + */ + if (!has_key || + cmpxchg_release(&inode->i_link, NULL, pstr.name) != NULL) + set_delayed_call(done, kfree_link, pstr.name); + return pstr.name; err_kfree: diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 855935853e0c..45dd16f5f920 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -584,9 +584,30 @@ out: } EXPORT_SYMBOL(fscrypt_get_encryption_info); +/** + * fscrypt_put_encryption_info - free most of an inode's fscrypt data + * + * Free the inode's fscrypt_info. Filesystems must call this when the inode is + * being evicted. An RCU grace period need not have elapsed yet. + */ void fscrypt_put_encryption_info(struct inode *inode) { put_crypt_info(inode->i_crypt_info); inode->i_crypt_info = NULL; } EXPORT_SYMBOL(fscrypt_put_encryption_info); + +/** + * fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay + * + * Free the inode's cached decrypted symlink target, if any. Filesystems must + * call this after an RCU grace period, just before they free the inode. + */ +void fscrypt_free_inode(struct inode *inode) +{ + if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) { + kfree(inode->i_link); + inode->i_link = NULL; + } +} +EXPORT_SYMBOL(fscrypt_free_inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6664d5948e93..e1d778c2c983 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1065,6 +1065,9 @@ static int ext4_drop_inode(struct inode *inode) static void ext4_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); + + fscrypt_free_inode(inode); + kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f236eef18a4f..bda96629525d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -982,6 +982,9 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) static void f2fs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); + + fscrypt_free_inode(inode); + kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode)); } diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 9bd341e2bf56..aeb87feab683 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -116,6 +116,7 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *, /* keyinfo.c */ extern int fscrypt_get_encryption_info(struct inode *); extern void fscrypt_put_encryption_info(struct inode *); +extern void fscrypt_free_inode(struct inode *); /* fname.c */ extern int fscrypt_setup_filename(struct inode *, const struct qstr *, @@ -325,6 +326,10 @@ static inline void fscrypt_put_encryption_info(struct inode *inode) return; } +static inline void fscrypt_free_inode(struct inode *inode) +{ +} + /* fname.c */ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, From dd038b348f51f93550644cfd5183ffe87c99ed57 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Mar 2019 11:39:11 -0700 Subject: [PATCH 40/47] fs, fscrypt: clear DCACHE_ENCRYPTED_NAME when unaliasing directory Make __d_move() clear DCACHE_ENCRYPTED_NAME on the source dentry. This is needed for when d_splice_alias() moves a directory's encrypted alias to its decrypted alias as a result of the encryption key being added. Otherwise, the decrypted alias will incorrectly be invalidated on the next lookup, causing problems such as unmounting a mount the user just mount()ed there. Note that we don't have to support arbitrary moves of this flag because fscrypt doesn't allow dentries with DCACHE_ENCRYPTED_NAME to be the source or target of a rename(). Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key") Reported-by: Sarthak Kukreti Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/dcache.c | 2 ++ include/linux/fscrypt.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 2e7e8d85e9b4..4298bb29ff11 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -2787,6 +2788,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); __d_rehash(dentry); fsnotify_update_flags(dentry); + fscrypt_handle_d_move(dentry); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index aeb87feab683..28c74e0a7231 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -90,6 +90,18 @@ static inline bool fscrypt_dummy_context_enabled(struct inode *inode) inode->i_sb->s_cop->dummy_context(inode); } +/* + * When d_splice_alias() moves a directory's encrypted alias to its decrypted + * alias as a result of the encryption key being added, DCACHE_ENCRYPTED_NAME + * must be cleared. Note that we don't have to support arbitrary moves of this + * flag because fscrypt doesn't allow encrypted aliases to be the source or + * target of a rename(). + */ +static inline void fscrypt_handle_d_move(struct dentry *dentry) +{ + dentry->d_flags &= ~DCACHE_ENCRYPTED_NAME; +} + /* crypto.c */ extern void fscrypt_enqueue_decrypt_work(struct work_struct *); extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); @@ -247,6 +259,10 @@ static inline bool fscrypt_dummy_context_enabled(struct inode *inode) return false; } +static inline void fscrypt_handle_d_move(struct dentry *dentry) +{ +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { From 846bf3b84a7b41cda4c0787245f91b00052895e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 10 Apr 2019 13:21:14 -0700 Subject: [PATCH 41/47] vfs: use READ_ONCE() to access ->i_link Use 'READ_ONCE(inode->i_link)' to explicitly support filesystems caching the symlink target in ->i_link later if it was unavailable at iget() time, or wasn't easily available. I'll be doing this in fscrypt, to improve the performance of encrypted symlinks on ext4, f2fs, and ubifs. ->i_link will start NULL and may later be set to a non-NULL value by a smp_store_release() or cmpxchg_release(). READ_ONCE() is needed on the read side. smp_load_acquire() is unnecessary because only a data dependency barrier is required. (Thanks to Al for pointing this out.) Acked-by: Al Viro Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 0cab6494978c..11f273fcdbdf 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1068,7 +1068,7 @@ const char *get_link(struct nameidata *nd) return ERR_PTR(error); nd->last_type = LAST_BIND; - res = inode->i_link; + res = READ_ONCE(inode->i_link); if (!res) { const char * (*get)(struct dentry *, struct inode *, struct delayed_call *); @@ -4731,7 +4731,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) spin_unlock(&inode->i_lock); } - link = inode->i_link; + link = READ_ONCE(inode->i_link); if (!link) { link = inode->i_op->get_link(dentry, inode, &done); if (IS_ERR(link)) From 1835402ece2f57054d35e2c615dd5686fb2fde65 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Jul 2019 09:58:48 -0700 Subject: [PATCH 42/47] ubifs, fscrypt: cache decrypted symlink target in ->i_link Reapplied fscrypt patch in ubifs. Fixes: 1f1be4a771eb ("fscrypt: cache decrypted symlink target in ->i_link") Signed-off-by: Jaegeuk Kim --- fs/ubifs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7c98ccca2c41..ebb9e847f6eb 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -276,6 +276,8 @@ static void ubifs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct ubifs_inode *ui = ubifs_inode(inode); + + fscrypt_free_inode(inode); kmem_cache_free(ubifs_inode_slab, ui); } From f62199a95c7f3680431b53a799c75cd2046b9cd4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:28 -0700 Subject: [PATCH 43/47] f2fs: use generic checking and prep function for FS_IOC_SETFLAGS Make the f2fs implementation of FS_IOC_SETFLAGS use the new VFS helper function vfs_ioc_setflags_prepare(). This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++++++++- fs/inode.c | 24 ++++++++++++++++++++++++ include/linux/fs.h | 3 +++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f8d46df8fa9e..b7aa0144d874 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1770,7 +1770,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - u32 fsflags; + struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags, old_fsflags; u32 iflags; int ret; @@ -1794,8 +1795,14 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) inode_lock(inode); + old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out; + ret = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; diff --git a/fs/inode.c b/fs/inode.c index 42f6d25f32a5..7aaa5fbff120 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2156,3 +2156,27 @@ struct timespec64 current_time(struct inode *inode) return timespec64_trunc(now, inode->i_sb->s_time_gran); } EXPORT_SYMBOL(current_time); + +/* + * Generic function to check FS_IOC_SETFLAGS values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/include/linux/fs.h b/include/linux/fs.h index eecca742c89f..1e0b48d3f7e4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3453,4 +3453,7 @@ extern void inode_nohighmem(struct inode *inode); extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, + unsigned int flags); + #endif /* _LINUX_FS_H */ From e831418555aad454620403ebe3b472db7603fbca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:29 -0700 Subject: [PATCH 44/47] f2fs: use generic checking function for FS_IOC_FSSETXATTR Make the f2fs implementation of FS_IOC_FSSETXATTR use the new VFS helper function vfs_ioc_fssetxattr_check(), and remove the project quota check since it's now done by the helper function. This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 49 ++++++++++++------------------------ fs/inode.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 9 +++++++ 3 files changed, 87 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b7aa0144d874..717bad8d31b0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2862,52 +2862,32 @@ static inline u32 f2fs_xflags_to_iflags(u32 xflags) return iflags; } +static void f2fs_fill_fsxattr(struct inode *inode, struct fsxattr *fa) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + simple_fill_fsxattr(fa, f2fs_iflags_to_xflags(fi->i_flags)); + + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) + fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid); +} + static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); struct fsxattr fa; - memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags); - - if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) - fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, - fi->i_projid); + f2fs_fill_fsxattr(inode, &fa); if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) return -EFAULT; return 0; } -static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) -{ - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid) - return -EINVAL; - - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) { - if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) - return -EINVAL; - } else { - if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) - return -EINVAL; - } - - return 0; -} - static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct fsxattr fa; + struct fsxattr fa, old_fa; u32 iflags; int err; @@ -2930,9 +2910,12 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return err; inode_lock(inode); - err = f2fs_ioctl_check_project(inode, &fa); + + f2fs_fill_fsxattr(inode, &old_fa); + err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); if (err) goto out; + err = f2fs_setflags_common(inode, iflags, f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); if (err) diff --git a/fs/inode.c b/fs/inode.c index 7aaa5fbff120..c2f67a1b0352 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2180,3 +2180,65 @@ int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, return 0; } EXPORT_SYMBOL(vfs_ioc_setflags_prepare); + +/* + * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa, + struct fsxattr *fa) +{ + /* + * Can't modify an immutable/append-only file unless we have + * appropriate permission. + */ + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() != &init_user_ns) { + if (old_fa->fsx_projid != fa->fsx_projid) + return -EINVAL; + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + /* Check extent size hints. */ + if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && + !S_ISDIR(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) && + !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + return -EINVAL; + + /* + * It is only valid to set the DAX flag on regular files and + * directories on filesystems. + */ + if ((fa->fsx_xflags & FS_XFLAG_DAX) && + !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + return -EINVAL; + + /* Extent size hints of zero turn off the flags. */ + if (fa->fsx_extsize == 0) + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + if (fa->fsx_cowextsize == 0) + fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_fssetxattr_check); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1e0b48d3f7e4..50afe1d28198 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3456,4 +3456,13 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, unsigned int flags); +int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa, + struct fsxattr *fa); + +static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags) +{ + memset(fa, 0, sizeof(*fa)); + fa->fsx_xflags = xflags; +} + #endif /* _LINUX_FS_H */ From 01d352552af61360ad019592caf8b2cfc2df11c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jul 2019 13:26:30 -0700 Subject: [PATCH 45/47] f2fs: remove redundant check from f2fs_setflags_common() Now that f2fs_ioc_setflags() and f2fs_ioc_fssetxattr() call the VFS helper functions which check for permission to change the immutable and append-only flags, it's no longer needed to do this check in f2fs_setflags_common() too. So remove it. This is based on a patch from Darrick Wong, but reworked to apply after commit 360985573b55 ("f2fs: separate f2fs i_flags from fs_flags and ext4 i_flags"). Originally-from: Darrick J. Wong Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Reviewed-by: Darrick J. Wong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 717bad8d31b0..3e58a6f697dd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1653,19 +1653,12 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); - u32 oldflags; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - oldflags = fi->i_flags; - - if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - fi->i_flags = iflags | (oldflags & ~mask); + fi->i_flags = iflags | (fi->i_flags & ~mask); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); From b0dd52a56d77295fa54875204d8b4b5788919249 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Jul 2019 18:31:53 -0700 Subject: [PATCH 46/47] f2fs: fix to read source block before invalidating it f2fs_allocate_data_block() invalidates old block address and enable new block address. Then, if we try to read old block by f2fs_submit_page_bio(), it will give WARN due to reading invalid blocks. Let's make the order sanely back. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 70 +++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6691f526fa40..8974672db78f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -796,6 +796,29 @@ static int move_data_block(struct inode *inode, block_t bidx, if (lfs_mode) down_write(&fio.sbi->io_order_lock); + mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + fio.old_blkaddr, false); + if (!mpage) + goto up_out; + + fio.encrypted_page = mpage; + + /* read source block in mpage */ + if (!PageUptodate(mpage)) { + err = f2fs_submit_page_bio(&fio); + if (err) { + f2fs_put_page(mpage, 1); + goto up_out; + } + lock_page(mpage); + if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || + !PageUptodate(mpage))) { + err = -EIO; + f2fs_put_page(mpage, 1); + goto up_out; + } + } + f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); @@ -803,44 +826,18 @@ static int move_data_block(struct inode *inode, block_t bidx, newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; + f2fs_put_page(mpage, 1); goto recover_block; } - mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), - fio.old_blkaddr, FGP_LOCK, GFP_NOFS); - if (mpage) { - bool updated = false; - - if (PageUptodate(mpage)) { - memcpy(page_address(fio.encrypted_page), - page_address(mpage), PAGE_SIZE); - updated = true; - } - f2fs_put_page(mpage, 1); - invalidate_mapping_pages(META_MAPPING(fio.sbi), - fio.old_blkaddr, fio.old_blkaddr); - if (updated) - goto write_page; - } - - err = f2fs_submit_page_bio(&fio); - if (err) - goto put_page_out; - - /* write page */ - lock_page(fio.encrypted_page); - - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { - err = -EIO; - goto put_page_out; - } - if (unlikely(!PageUptodate(fio.encrypted_page))) { - err = -EIO; - goto put_page_out; - } - -write_page: + /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); + memcpy(page_address(fio.encrypted_page), + page_address(mpage), PAGE_SIZE); + f2fs_put_page(mpage, 1); + invalidate_mapping_pages(META_MAPPING(fio.sbi), + fio.old_blkaddr, fio.old_blkaddr); + set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) dec_page_count(fio.sbi, F2FS_DIRTY_META); @@ -871,11 +868,12 @@ write_page: put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: - if (lfs_mode) - up_write(&fio.sbi->io_order_lock); if (err) f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, true, true); +up_out: + if (lfs_mode) + up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: From 94472d527e3b6d7f313349b01f0279da5a25bd6b Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 25 Jul 2019 11:08:52 +0800 Subject: [PATCH 47/47] f2fs: use EINVAL for superblock with invalid magic The kernel mount_block_root() function expects -EACESS or -EINVAL for a unmountable filesystem when trying to mount the root with different filesystem types. However, in 5.3-rc1 the behavior when F2FS code cannot find valid block changed to return -EFSCORRUPTED(-EUCLEAN), and this error code makes mount_block_root() fail when trying to probe F2FS. When the magic number of the superblock mismatches, it has a high probability that it's just not a F2FS. In this case return -EINVAL seems to be a better result, and this return value can make mount_block_root() probing work again. Return -EINVAL when the superblock has magic mismatch, -EFSCORRUPTED in other cases (the magic matches but the superblock cannot be recognized). Fixes: 10f966bbf521 ("f2fs: use generic EFSBADCRC/EFSCORRUPTED") Signed-off-by: Icenowy Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bda96629525d..f546d1bdae1b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2430,6 +2430,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, size_t crc_offset = 0; __u32 crc = 0; + if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) { + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + return -EINVAL; + } + /* Check checksum_offset and crc in superblock */ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { crc_offset = le32_to_cpu(raw_super->checksum_offset); @@ -2437,26 +2443,20 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, offsetof(struct f2fs_super_block, crc)) { f2fs_info(sbi, "Invalid SB checksum offset: %zu", crc_offset); - return 1; + return -EFSCORRUPTED; } crc = le32_to_cpu(raw_super->crc); if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { f2fs_info(sbi, "Invalid SB checksum value: %u", crc); - return 1; + return -EFSCORRUPTED; } } - if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); - return 1; - } - /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", PAGE_SIZE); - return 1; + return -EFSCORRUPTED; } /* Currently, support only 4KB block size */ @@ -2464,14 +2464,14 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (blocksize != F2FS_BLKSIZE) { f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", blocksize); - return 1; + return -EFSCORRUPTED; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { f2fs_info(sbi, "Invalid log blocks per segment (%u)", le32_to_cpu(raw_super->log_blocks_per_seg)); - return 1; + return -EFSCORRUPTED; } /* Currently, support 512/1024/2048/4096 bytes sector size */ @@ -2481,7 +2481,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MIN_LOG_SECTOR_SIZE) { f2fs_info(sbi, "Invalid log sectorsize (%u)", le32_to_cpu(raw_super->log_sectorsize)); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != @@ -2489,7 +2489,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", le32_to_cpu(raw_super->log_sectors_per_block), le32_to_cpu(raw_super->log_sectorsize)); - return 1; + return -EFSCORRUPTED; } segment_count = le32_to_cpu(raw_super->segment_count); @@ -2503,7 +2503,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { f2fs_info(sbi, "Invalid segment count (%u)", segment_count); - return 1; + return -EFSCORRUPTED; } if (total_sections > segment_count || @@ -2511,25 +2511,25 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, segs_per_sec > segment_count || !segs_per_sec) { f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", segment_count, total_sections, segs_per_sec); - return 1; + return -EFSCORRUPTED; } if ((segment_count / segs_per_sec) < total_sections) { f2fs_info(sbi, "Small segment_count (%u < %u * %u)", segment_count, segs_per_sec, total_sections); - return 1; + return -EFSCORRUPTED; } if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", segment_count, le64_to_cpu(raw_super->block_count)); - return 1; + return -EFSCORRUPTED; } if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || raw_super->hot_ext_count > F2FS_MAX_EXTENSION || @@ -2539,7 +2539,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, le32_to_cpu(raw_super->extension_count), raw_super->hot_ext_count, F2FS_MAX_EXTENSION); - return 1; + return -EFSCORRUPTED; } if (le32_to_cpu(raw_super->cp_payload) > @@ -2547,7 +2547,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, f2fs_info(sbi, "Insane cp_payload (%u > %u)", le32_to_cpu(raw_super->cp_payload), blocks_per_seg - F2FS_CP_PACKS); - return 1; + return -EFSCORRUPTED; } /* check reserved ino info */ @@ -2558,12 +2558,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, le32_to_cpu(raw_super->node_ino), le32_to_cpu(raw_super->meta_ino), le32_to_cpu(raw_super->root_ino)); - return 1; + return -EFSCORRUPTED; } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) - return 1; + return -EFSCORRUPTED; return 0; } @@ -2880,10 +2880,10 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, } /* sanity checking of raw super */ - if (sanity_check_raw_super(sbi, bh)) { + err = sanity_check_raw_super(sbi, bh); + if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - err = -EFSCORRUPTED; brelse(bh); continue; }