diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 94132745ecbe..8140fc98f5ae 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -190,12 +190,6 @@ Description: Controls the memory footprint used by free nids and cached nat entries. By default, 1 is set, which indicates 10 MB / 1 GB RAM. -What: /sys/fs/f2fs//batched_trim_sections -Date: February 2015 -Contact: "Jaegeuk Kim" -Description: Controls the trimming rate in batch mode. - - What: /sys/fs/f2fs//cp_interval Date: October 2015 Contact: "Jaegeuk Kim" @@ -729,3 +723,20 @@ What: /sys/fs/f2fs//last_age_weight Date: January 2023 Contact: "Ping Xiong" Description: When DATA SEPARATION is on, it controls the weight of last data block age. + +What: /sys/fs/f2fs//compress_watermark +Date: February 2023 +Contact: "Yangtao Li" +Description: When compress cache is on, it controls free memory watermark + in order to limit caching compress page. If free memory is lower + than watermark, then deny caching compress page. The value should be in + range of (0, 100], by default it was initialized as 20(%). + +What: /sys/fs/f2fs//compress_percent +Date: February 2023 +Contact: "Yangtao Li" +Description: When compress cache is on, it controls cached page + percent(compress pages / free_ram) in order to limit caching compress page. + If cached page percent exceed threshold, then deny caching compress page. + The value should be in range of (0, 100], by default it was initialized + as 20(%). diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 2055e72871fe..c57745375edb 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -264,7 +264,7 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl disabled, any unmounting or unexpected shutdowns will cause the filesystem contents to appear as they did when the filesystem was mounted with that option. - While mounting with checkpoint=disabled, the filesystem must + While mounting with checkpoint=disable, the filesystem must run garbage collection to ensure that all available space can be used. If this takes too much time, the mount may return EAGAIN. You may optionally add a value to indicate how much diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1369ec892a2c..7e0870eeb07b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -152,6 +152,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, se = get_seg_entry(sbi, segno); exist = f2fs_test_bit(offset, se->cur_valid_map); + + /* skip data, if we already have an error in checkpoint. */ + if (unlikely(f2fs_cp_error(sbi))) + return exist; + if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", blkaddr, exist); @@ -202,6 +207,11 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC_ENHANCE_UPDATE: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { + + /* Skip to emit an error message. */ + if (unlikely(f2fs_cp_error(sbi))) + return false; + f2fs_warn(sbi, "access invalid blkaddr:%u", blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); @@ -325,8 +335,15 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { + ClearPageUptodate(page); + dec_page_count(sbi, F2FS_DIRTY_META); + unlock_page(page); + return 0; + } goto redirty_out; + } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) @@ -505,6 +522,7 @@ retry: if (!e) { if (!new) { spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } e = new; @@ -703,32 +721,18 @@ err_out: int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blocks, i, j; - unsigned int s_flags = sbi->sb->s_flags; int err = 0; -#ifdef CONFIG_QUOTA - int quota_enabled; -#endif if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; - if (bdev_read_only(sbi->sb->s_bdev)) { + if (f2fs_hw_is_readonly(sbi)) { f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); return 0; } - if (s_flags & SB_RDONLY) { + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "orphan cleanup on readonly fs"); - sbi->sb->s_flags &= ~SB_RDONLY; - } - -#ifdef CONFIG_QUOTA - /* - * Turn on quotas which were not enabled for read-only mounts if - * filesystem has quota feature, so that they are updated correctly. - */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); -#endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); @@ -762,13 +766,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) out: set_sbi_flag(sbi, SBI_IS_RECOVERED); -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - if (quota_enabled) - f2fs_quota_off_umount(sbi->sb); -#endif - sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ - return err; } @@ -979,7 +976,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); if (cur_page == cp2) - cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp_blk_no += BIT(le32_to_cpu(fsb->log_blocks_per_seg)); for (i = 1; i < cp_blks; i++) { void *sit_bitmap_ptr; @@ -1130,7 +1127,7 @@ retry: goto retry; } -int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) +static int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) { struct list_head *head = &sbi->inode_list[DIRTY_META]; struct inode *inode; @@ -1303,7 +1300,8 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (!get_pages(sbi, type)) break; - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi) && + !is_sbi_flag_set(sbi, SBI_IS_CLOSE))) break; if (type == F2FS_DIRTY_META) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 9a4eedab236e..11653fa79289 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -264,35 +264,21 @@ static void lz4_destroy_compress_ctx(struct compress_ctx *cc) cc->private = NULL; } -#ifdef CONFIG_F2FS_FS_LZ4HC -static int lz4hc_compress_pages(struct compress_ctx *cc) -{ - unsigned char level = F2FS_I(cc->inode)->i_compress_level; - int len; - - if (level) - len = LZ4_compress_HC(cc->rbuf, cc->cbuf->cdata, cc->rlen, - cc->clen, level, cc->private); - else - len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, - cc->clen, cc->private); - if (!len) - return -EAGAIN; - - cc->clen = len; - return 0; -} -#endif - static int lz4_compress_pages(struct compress_ctx *cc) { - int len; + int len = -EINVAL; + unsigned char level = F2FS_I(cc->inode)->i_compress_level; -#ifdef CONFIG_F2FS_FS_LZ4HC - return lz4hc_compress_pages(cc); -#endif - len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, + if (!level) + len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, cc->clen, cc->private); +#ifdef CONFIG_F2FS_FS_LZ4HC + else + len = LZ4_compress_HC(cc->rbuf, cc->cbuf->cdata, cc->rlen, + cc->clen, level, cc->private); +#endif + if (len < 0) + return len; if (!len) return -EAGAIN; @@ -670,7 +656,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cbuf->clen = cpu_to_le32(cc->clen); - if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM) + if (fi->i_compress_flag & BIT(COMPRESS_CHKSUM)) chksum = f2fs_crc32(F2FS_I_SB(cc->inode), cc->cbuf->cdata, cc->clen); cc->cbuf->chksum = cpu_to_le32(chksum); @@ -766,7 +752,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) ret = cops->decompress_pages(dic); - if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) { + if (!ret && (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))) { u32 provided = le32_to_cpu(dic->cbuf->chksum); u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); @@ -1461,6 +1447,12 @@ continue_unlock: if (!PageDirty(cc->rpages[i])) goto continue_unlock; + if (PageWriteback(cc->rpages[i])) { + if (wbc->sync_mode == WB_SYNC_NONE) + goto continue_unlock; + f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true); + } + if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2cdefb3ab7af..8260f4b63fae 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -94,17 +94,17 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { #ifdef CONFIG_FS_ENCRYPTION - STEP_DECRYPT = 1 << 0, + STEP_DECRYPT = BIT(0), #else STEP_DECRYPT = 0, /* compile out the decryption-related code */ #endif #ifdef CONFIG_F2FS_FS_COMPRESSION - STEP_DECOMPRESS = 1 << 1, + STEP_DECOMPRESS = BIT(1), #else STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ #endif #ifdef CONFIG_FS_VERITY - STEP_VERITY = 1 << 2, + STEP_VERITY = BIT(2), #else STEP_VERITY = 0, /* compile out the verity-related code */ #endif @@ -421,7 +421,7 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { - unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; + unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; @@ -443,9 +443,9 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) * 5 | 4 | 3 | 2 | 1 | 0 | * Cold | Warm | Hot | Cold | Warm | Hot | */ - if ((1 << fio->temp) & meta_flag) + if (BIT(fio->temp) & meta_flag) op_flags |= REQ_META; - if ((1 << fio->temp) & fua_flag) + if (BIT(fio->temp) & fua_flag) op_flags |= REQ_FUA; return op_flags; } @@ -879,6 +879,8 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, bool found = false; struct bio *target = bio ? *bio : NULL; + f2fs_bug_on(sbi, !target && !page); + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; struct list_head *head = &io->bio_list; @@ -2252,6 +2254,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (ret) goto out; + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; + goto out_put_dnode; + } f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); skip_reading_dnode: @@ -2815,7 +2821,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * don't drop any dirty dentry pages for keeping lastest * directory structure. */ - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode) && + !is_sbi_flag_set(sbi, SBI_IS_CLOSE)) goto redirty_out; goto out; } @@ -2915,7 +2922,8 @@ out: if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, DATA); - f2fs_submit_merged_ipu_write(sbi, bio, NULL); + if (bio && *bio) + f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } @@ -3110,11 +3118,9 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) - f2fs_wait_on_page_writeback(page, - DATA, true, true); - else + if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; + f2fs_wait_on_page_writeback(page, DATA, true, true); } if (!clear_page_dirty_for_io(page)) @@ -3471,7 +3477,7 @@ unlock_out: static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, struct page *page, loff_t pos, unsigned int len, - block_t *blk_addr, bool *node_changed) + block_t *blk_addr, bool *node_changed, bool *use_cow) { struct inode *inode = page->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; @@ -3485,10 +3491,12 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, /* Look for the block in COW inode first */ err = __find_data_block(cow_inode, index, blk_addr); - if (err) + if (err) { return err; - else if (*blk_addr != NULL_ADDR) + } else if (*blk_addr != NULL_ADDR) { + *use_cow = true; return 0; + } if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) goto reserve_block; @@ -3518,6 +3526,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct page *page = NULL; pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; bool need_balance = false; + bool use_cow = false; block_t blkaddr = NULL_ADDR; int err = 0; @@ -3577,7 +3586,7 @@ repeat: if (f2fs_is_atomic_file(inode)) err = prepare_atomic_write_begin(sbi, page, pos, len, - &blkaddr, &need_balance); + &blkaddr, &need_balance, &use_cow); else err = prepare_write_begin(sbi, page, pos, len, &blkaddr, &need_balance); @@ -3617,7 +3626,9 @@ repeat: f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr, 0, true); + err = f2fs_submit_page_read(use_cow ? + F2FS_I(inode)->cow_inode : inode, page, + blkaddr, 0, true); if (err) goto fail; @@ -3710,37 +3721,16 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) f2fs_remove_dirty_inode(inode); } } - - clear_page_private_reference(&folio->page); - clear_page_private_gcing(&folio->page); - - if (test_opt(sbi, COMPRESS_CACHE) && - inode->i_ino == F2FS_COMPRESS_INO(sbi)) - clear_page_private_data(&folio->page); - - folio_detach_private(folio); + clear_page_private_all(&folio->page); } bool f2fs_release_folio(struct folio *folio, gfp_t wait) { - struct f2fs_sb_info *sbi; - /* If this is dirty folio, keep private data */ if (folio_test_dirty(folio)) return false; - sbi = F2FS_M_SB(folio->mapping); - if (test_opt(sbi, COMPRESS_CACHE)) { - struct inode *inode = folio->mapping->host; - - if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) - clear_page_private_data(&folio->page); - } - - clear_page_private_reference(&folio->page); - clear_page_private_gcing(&folio->page); - - folio_detach_private(folio); + clear_page_private_all(&folio->page); return true; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 30a77936e3c5..61c35b59126e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -336,22 +336,23 @@ get_cache: #endif } -static char *s_flag[] = { - [SBI_IS_DIRTY] = " fs_dirty", - [SBI_IS_CLOSE] = " closing", - [SBI_NEED_FSCK] = " need_fsck", - [SBI_POR_DOING] = " recovering", - [SBI_NEED_SB_WRITE] = " sb_dirty", - [SBI_NEED_CP] = " need_cp", - [SBI_IS_SHUTDOWN] = " shutdown", - [SBI_IS_RECOVERED] = " recovered", - [SBI_CP_DISABLED] = " cp_disabled", - [SBI_CP_DISABLED_QUICK] = " cp_disabled_quick", - [SBI_QUOTA_NEED_FLUSH] = " quota_need_flush", - [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", - [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", - [SBI_IS_RESIZEFS] = " resizefs", - [SBI_IS_FREEZING] = " freezefs", +static const char *s_flag[MAX_SBI_FLAG] = { + [SBI_IS_DIRTY] = "fs_dirty", + [SBI_IS_CLOSE] = "closing", + [SBI_NEED_FSCK] = "need_fsck", + [SBI_POR_DOING] = "recovering", + [SBI_NEED_SB_WRITE] = "sb_dirty", + [SBI_NEED_CP] = "need_cp", + [SBI_IS_SHUTDOWN] = "shutdown", + [SBI_IS_RECOVERED] = "recovered", + [SBI_CP_DISABLED] = "cp_disabled", + [SBI_CP_DISABLED_QUICK] = "cp_disabled_quick", + [SBI_QUOTA_NEED_FLUSH] = "quota_need_flush", + [SBI_QUOTA_SKIP_FLUSH] = "quota_skip_flush", + [SBI_QUOTA_NEED_REPAIR] = "quota_need_repair", + [SBI_IS_RESIZEFS] = "resizefs", + [SBI_IS_FREEZING] = "freezefs", + [SBI_IS_WRITABLE] = "writable", }; static const char *ipu_mode_names[F2FS_IPU_MAX] = { @@ -384,8 +385,8 @@ static int stat_show(struct seq_file *s, void *v) "Disabled" : (f2fs_cp_error(sbi) ? "Error" : "Good")); if (sbi->s_flag) { seq_puts(s, "[SBI:"); - for_each_set_bit(j, &sbi->s_flag, 32) - seq_puts(s, s_flag[j]); + for_each_set_bit(j, &sbi->s_flag, MAX_SBI_FLAG) + seq_printf(s, " %s", s_flag[j]); seq_puts(s, "]\n"); } seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9ccdbe120425..887e55988450 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -29,7 +29,7 @@ static unsigned long dir_blocks(struct inode *inode) static unsigned int dir_buckets(unsigned int level, int dir_level) { if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) - return 1 << (level + dir_level); + return BIT(level + dir_level); else return MAX_DIR_BUCKETS; } @@ -42,39 +42,6 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { - [F2FS_FT_UNKNOWN] = DT_UNKNOWN, - [F2FS_FT_REG_FILE] = DT_REG, - [F2FS_FT_DIR] = DT_DIR, - [F2FS_FT_CHRDEV] = DT_CHR, - [F2FS_FT_BLKDEV] = DT_BLK, - [F2FS_FT_FIFO] = DT_FIFO, - [F2FS_FT_SOCK] = DT_SOCK, - [F2FS_FT_SYMLINK] = DT_LNK, -}; - -static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, - [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, -}; - -static void set_de_type(struct f2fs_dir_entry *de, umode_t mode) -{ - de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; -} - -unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) -{ - if (de->file_type < F2FS_FT_MAX) - return f2fs_filetype_table[de->file_type]; - return DT_UNKNOWN; -} - /* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */ int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) @@ -485,7 +452,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, lock_page(page); f2fs_wait_on_page_writeback(page, type, true, true); de->ino = cpu_to_le32(inode->i_ino); - set_de_type(de, inode->i_mode); + de->file_type = fs_umode_to_ftype(inode->i_mode); set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); @@ -699,7 +666,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, de->name_len = cpu_to_le16(name->len); memcpy(d->filename[bit_pos], name->name, name->len); de->ino = cpu_to_le32(ino); - set_de_type(de, mode); + de->file_type = fs_umode_to_ftype(mode); for (i = 0; i < slots; i++) { __set_bit_le(bit_pos + i, (void *)d->bitmap); /* avoid wrong garbage data for readdir */ @@ -938,14 +905,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_clear_page_cache_dirty_tag(page); clear_page_dirty_for_io(page); ClearPageUptodate(page); - - clear_page_private_gcing(page); + clear_page_private_all(page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); - - detach_page_private(page); - set_page_private(page, 0); } f2fs_put_page(page, 1); @@ -1036,7 +999,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, continue; } - d_type = f2fs_get_de_type(de); + d_type = fs_ftype_to_dtype(de->file_type); de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 9a8153895d20..0e2d49140c07 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -23,18 +23,26 @@ bool sanity_check_extent_cache(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + struct extent_tree *et = fi->extent_tree[EX_READ]; struct extent_info *ei; - if (!fi->extent_tree[EX_READ]) + if (!et) return true; - ei = &fi->extent_tree[EX_READ]->largest; + ei = &et->largest; + if (!ei->len) + return true; - if (ei->len && - (!f2fs_is_valid_blkaddr(sbi, ei->blk, - DATA_GENERIC_ENHANCE) || - !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, - DATA_GENERIC_ENHANCE))) { + /* Let's drop, if checkpoint got corrupted. */ + if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { + ei->len = 0; + et->largest_updated = true; + return true; + } + + if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + DATA_GENERIC_ENHANCE)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, @@ -86,7 +94,6 @@ static bool __may_age_extent_tree(struct inode *inode) if (!test_opt(sbi, AGE_EXTENT_CACHE)) return false; - /* don't cache block age info for cold file */ if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) return false; if (file_is_cold(inode)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f9e607bc2cfe..911bea200217 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -65,7 +65,7 @@ enum { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1) +#define F2FS_ALL_FAULT_TYPE (GENMASK(FAULT_MAX - 1, 0)) struct f2fs_fault_info { atomic_t inject_ops; @@ -74,7 +74,7 @@ struct f2fs_fault_info { }; extern const char *f2fs_fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) +#define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) #endif /* @@ -843,7 +843,7 @@ struct f2fs_inode_info { kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ struct timespec64 i_crtime; /* inode creation time */ - struct timespec64 i_disk_time[4];/* inode disk times */ + struct timespec64 i_disk_time[3];/* inode disk times */ /* for file compress */ atomic_t i_compr_blocks; /* # of compressed blocks */ @@ -1274,7 +1274,10 @@ struct f2fs_gc_control { unsigned int nr_free_secs; /* # of free sections to do GC */ }; -/* For s_flag in struct f2fs_sb_info */ +/* + * For s_flag in struct f2fs_sb_info + * Modification on enum should be synchronized with s_flag array + */ enum { SBI_IS_DIRTY, /* dirty flag for checkpoint */ SBI_IS_CLOSE, /* specify unmounting */ @@ -1291,6 +1294,8 @@ enum { SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ SBI_IS_FREEZING, /* freezefs is in process */ + SBI_IS_WRITABLE, /* remove ro mountoption transiently */ + MAX_SBI_FLAG, }; enum { @@ -1393,86 +1398,6 @@ enum { PAGE_PRIVATE_MAX }; -#define PAGE_PRIVATE_GET_FUNC(name, flagname) \ -static inline bool page_private_##name(struct page *page) \ -{ \ - return PagePrivate(page) && \ - test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ - test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ -} - -#define PAGE_PRIVATE_SET_FUNC(name, flagname) \ -static inline void set_page_private_##name(struct page *page) \ -{ \ - if (!PagePrivate(page)) { \ - get_page(page); \ - SetPagePrivate(page); \ - set_page_private(page, 0); \ - } \ - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \ - set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ -} - -#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ -static inline void clear_page_private_##name(struct page *page) \ -{ \ - clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ - if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { \ - set_page_private(page, 0); \ - if (PagePrivate(page)) { \ - ClearPagePrivate(page); \ - put_page(page); \ - }\ - } \ -} - -PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); -PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); -PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); - -PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); -PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); -PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); - -PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); -PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); -PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); - -static inline unsigned long get_page_private_data(struct page *page) -{ - unsigned long data = page_private(page); - - if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) - return 0; - return data >> PAGE_PRIVATE_MAX; -} - -static inline void set_page_private_data(struct page *page, unsigned long data) -{ - if (!PagePrivate(page)) { - get_page(page); - SetPagePrivate(page); - set_page_private(page, 0); - } - set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); - page_private(page) |= data << PAGE_PRIVATE_MAX; -} - -static inline void clear_page_private_data(struct page *page) -{ - page_private(page) &= (1 << PAGE_PRIVATE_MAX) - 1; - if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { - set_page_private(page, 0); - if (PagePrivate(page)) { - ClearPagePrivate(page); - put_page(page); - } - } -} - /* For compression */ enum compress_algorithm_type { COMPRESS_LZO, @@ -1598,7 +1523,6 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ - unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */ #endif /* for node-related operations */ @@ -2367,6 +2291,80 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); #define f2fs_debug(sbi, fmt, ...) \ f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) +#define PAGE_PRIVATE_GET_FUNC(name, flagname) \ +static inline bool page_private_##name(struct page *page) \ +{ \ + return PagePrivate(page) && \ + test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \ + test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_SET_FUNC(name, flagname) \ +static inline void set_page_private_##name(struct page *page) \ +{ \ + if (!PagePrivate(page)) \ + attach_page_private(page, (void *)0); \ + set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \ + set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ +} + +#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \ +static inline void clear_page_private_##name(struct page *page) \ +{ \ + clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \ + if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) \ + detach_page_private(page); \ +} + +PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); +PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); + +PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); + +PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); +PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); +PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); + +static inline unsigned long get_page_private_data(struct page *page) +{ + unsigned long data = page_private(page); + + if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data)) + return 0; + return data >> PAGE_PRIVATE_MAX; +} + +static inline void set_page_private_data(struct page *page, unsigned long data) +{ + if (!PagePrivate(page)) + attach_page_private(page, (void *)0); + set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); + page_private(page) |= data << PAGE_PRIVATE_MAX; +} + +static inline void clear_page_private_data(struct page *page) +{ + page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0); + if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) + detach_page_private(page); +} + +static inline void clear_page_private_all(struct page *page) +{ + clear_page_private_data(page); + clear_page_private_reference(page); + clear_page_private_gcing(page); + clear_page_private_inline(page); + + f2fs_bug_on(F2FS_P_SB(page), page_private(page)); +} + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -2873,7 +2871,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); return mask & *addr; } @@ -2882,7 +2880,7 @@ static inline void f2fs_set_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr |= mask; } @@ -2891,7 +2889,7 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr &= ~mask; } @@ -2901,7 +2899,7 @@ static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) int ret; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); ret = mask & *addr; *addr |= mask; return ret; @@ -2913,7 +2911,7 @@ static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) int ret; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); ret = mask & *addr; *addr &= ~mask; return ret; @@ -2924,7 +2922,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) int mask; addr += (nr >> 3); - mask = 1 << (7 - (nr & 0x07)); + mask = BIT(7 - (nr & 0x07)); *addr ^= mask; } @@ -3288,9 +3286,6 @@ static inline bool f2fs_is_time_consistent(struct inode *inode) return false; if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, - &F2FS_I(inode)->i_crtime)) - return false; return true; } @@ -3351,6 +3346,19 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } +static inline void *f2fs_getname(struct f2fs_sb_info *sbi) +{ + if (time_to_inject(sbi, FAULT_KMALLOC)) + return NULL; + + return __getname(); +} + +static inline void f2fs_putname(char *buf) +{ + __putname(buf); +} + static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -3470,7 +3478,6 @@ int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, /* * dir.c */ -unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de); int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname); int f2fs_setup_filename(struct inode *dir, const struct qstr *iname, @@ -3719,7 +3726,6 @@ void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type); bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, unsigned int devidx, int type); -int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi); void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi); void f2fs_add_orphan_inode(struct inode *inode); @@ -3812,6 +3818,10 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); +/* victim selection function for cleaning and SSR */ +int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, + int gc_type, int type, char alloc_mode, + unsigned long long age); /* * recovery.c @@ -4319,9 +4329,9 @@ static inline int set_compress_context(struct inode *inode) F2FS_OPTION(sbi).compress_log_size; F2FS_I(inode)->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? - 1 << COMPRESS_CHKSUM : 0; + BIT(COMPRESS_CHKSUM) : 0; F2FS_I(inode)->i_cluster_size = - 1 << F2FS_I(inode)->i_log_cluster_size; + BIT(F2FS_I(inode)->i_log_cluster_size); if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 || F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) && F2FS_OPTION(sbi).compress_level) @@ -4379,7 +4389,7 @@ F2FS_FEATURE_FUNCS(readonly, RO); static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, block_t blkaddr) { - unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz; + unsigned int zno = blkaddr / sbi->blocks_per_blkz; return test_bit(zno, FDEV(devi).blkz_seq); } @@ -4427,6 +4437,11 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi) return false; } +static inline bool f2fs_dev_is_readonly(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_readonly(sbi) || f2fs_hw_is_readonly(sbi); +} + static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) { return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a053bde9096a..1b7a3defba4a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2113,7 +2113,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); } else { /* Reuse the already created COW inode */ - f2fs_do_truncate_blocks(fi->cow_inode, 0, true); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); + if (ret) { + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + goto out; + } } f2fs_write_inode(inode, NULL); @@ -3009,15 +3013,16 @@ int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid) struct dquot *transfer_to[MAXQUOTAS] = {}; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - int err = 0; + int err; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); - if (!IS_ERR(transfer_to[PRJQUOTA])) { - err = __dquot_transfer(inode, transfer_to); - if (err) - set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - dqput(transfer_to[PRJQUOTA]); - } + if (IS_ERR(transfer_to[PRJQUOTA])) + return PTR_ERR(transfer_to[PRJQUOTA]); + + err = __dquot_transfer(inode, transfer_to); + if (err) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + dqput(transfer_to[PRJQUOTA]); return err; } @@ -3964,7 +3969,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) F2FS_I(inode)->i_compress_algorithm = option.algorithm; F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size; + F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) @@ -4062,8 +4067,11 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) - filemap_fdatawrite(inode->i_mapping); + if (get_dirty_pages(inode) >= blk_per_seg) { + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + break; + } count -= len; page_idx += len; @@ -4133,8 +4141,11 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) - filemap_fdatawrite(inode->i_mapping); + if (get_dirty_pages(inode) >= blk_per_seg) { + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + break; + } count -= len; page_idx += len; @@ -4361,7 +4372,7 @@ static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw) struct inode *inode = file_inode(iocb->ki_filp); char *buf, *path; - buf = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL); + buf = f2fs_getname(F2FS_I_SB(inode)); if (!buf) return; path = dentry_path_raw(file_dentry(iocb->ki_filp), buf, PATH_MAX); @@ -4374,7 +4385,7 @@ static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw) trace_f2fs_dataread_start(inode, iocb->ki_pos, count, current->pid, path, current->comm); free_buf: - kfree(buf); + f2fs_putname(buf); } static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -4534,6 +4545,19 @@ static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { .end_io = f2fs_dio_write_end_io, }; +static void f2fs_flush_buffered_write(struct address_space *mapping, + loff_t start_pos, loff_t end_pos) +{ + int ret; + + ret = filemap_write_and_wait_range(mapping, start_pos, end_pos); + if (ret < 0) + return; + invalidate_mapping_pages(mapping, + start_pos >> PAGE_SHIFT, + end_pos >> PAGE_SHIFT); +} + static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, bool *may_need_sync) { @@ -4633,14 +4657,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, ret += ret2; - ret2 = filemap_write_and_wait_range(file->f_mapping, - bufio_start_pos, - bufio_end_pos); - if (ret2 < 0) - goto out; - invalidate_mapping_pages(file->f_mapping, - bufio_start_pos >> PAGE_SHIFT, - bufio_end_pos >> PAGE_SHIFT); + f2fs_flush_buffered_write(file->f_mapping, + bufio_start_pos, + bufio_end_pos); } } else { /* iomap_dio_rw() already handled the generic_write_sync(). */ @@ -4723,8 +4742,18 @@ out_unlock: inode_unlock(inode); out: trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); + if (ret > 0 && may_need_sync) ret = generic_write_sync(iocb, ret); + + /* If buffered IO was forced, flush and drop the data from + * the page cache to preserve O_DIRECT semantics + */ + if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT)) + f2fs_flush_buffered_write(iocb->ki_filp->f_mapping, + orig_pos, + orig_pos + ret - 1); + return ret; } @@ -4879,6 +4908,7 @@ const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, .read_iter = f2fs_file_read_iter, .write_iter = f2fs_file_write_iter, + .iopoll = iocb_bio_iopoll, .open = f2fs_file_open, .release = f2fs_release_file, .mmap = f2fs_file_mmap, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index adef513aac33..30f78b834573 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -741,9 +741,9 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, * When it is called from SSR segment selection, it finds a segment * which has minimum valid blocks and removes it from dirty seglist. */ -static int get_victim_by_default(struct f2fs_sb_info *sbi, - unsigned int *result, int gc_type, int type, - char alloc_mode, unsigned long long age) +int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, + int gc_type, int type, char alloc_mode, + unsigned long long age) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); @@ -937,10 +937,6 @@ out: return ret; } -static const struct victim_selection default_v_ops = { - .get_victim = get_victim_by_default, -}; - static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; @@ -1620,14 +1616,14 @@ next_step: int err; if (S_ISREG(inode->i_mode)) { - if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; continue; } if (!f2fs_down_write_trylock( - &fi->i_gc_rwsem[WRITE])) { + &fi->i_gc_rwsem[READ])) { sbi->skipped_gc_rwsem++; - f2fs_up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); continue; } locked = true; @@ -1650,8 +1646,8 @@ next_step: submitted++; if (locked) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); f2fs_up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -1671,8 +1667,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, int ret; down_write(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, - NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); up_write(&sit_i->sentry_lock); return ret; } @@ -1810,6 +1805,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; unsigned int skipped_round = 0, round = 0; + unsigned int upper_secs; trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc, gc_control->nr_free_secs, @@ -1822,8 +1818,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) prefree_segments(sbi)); cpc.reason = __get_cp_reason(sbi); - sbi->skipped_gc_rwsem = 0; gc_more: + sbi->skipped_gc_rwsem = 0; if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) { ret = -EINVAL; goto stop; @@ -1833,7 +1829,10 @@ gc_more: goto stop; } - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) { + /* Let's run FG_GC, if we don't have enough space. */ + if (has_not_enough_free_secs(sbi, 0, 0)) { + gc_type = FG_GC; + /* * For example, if there are many prefree_segments below given * threshold, we can make them free by checkpoint. Then, we @@ -1844,8 +1843,6 @@ gc_more: if (ret) goto stop; } - if (has_not_enough_free_secs(sbi, 0, 0)) - gc_type = FG_GC; } /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */ @@ -1872,19 +1869,15 @@ retry: if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) sec_freed++; - if (gc_type == FG_GC) + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; - if (gc_control->init_gc_type == FG_GC || - !has_not_enough_free_secs(sbi, - (gc_type == FG_GC) ? sec_freed : 0, 0)) { - if (gc_type == FG_GC && sec_freed < gc_control->nr_free_secs) - goto go_gc_more; - goto stop; - } - - /* FG_GC stops GC by skip_count */ - if (gc_type == FG_GC) { + if (has_enough_free_secs(sbi, sec_freed, 0)) { + if (!gc_control->no_bg_gc && + sec_freed < gc_control->nr_free_secs) + goto go_gc_more; + goto stop; + } if (sbi->skipped_gc_rwsem) skipped_round++; round++; @@ -1893,10 +1886,17 @@ retry: ret = f2fs_write_checkpoint(sbi, &cpc); goto stop; } + } else if (has_enough_free_secs(sbi, 0, 0)) { + goto stop; } - /* Write checkpoint to reclaim prefree segments */ - if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE && + __get_secs_required(sbi, NULL, &upper_secs, NULL); + + /* + * Write checkpoint to reclaim prefree segments. + * We need more three extra sections for writer's data/node/dentry. + */ + if (free_sections(sbi) <= upper_secs + NR_GC_CHECKPOINT_SECS && prefree_segments(sbi)) { ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) @@ -1963,8 +1963,6 @@ static void init_atgc_management(struct f2fs_sb_info *sbi) void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) { - DIRTY_I(sbi)->v_ops = &default_v_ops; - sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ @@ -2095,8 +2093,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) FDEV(last_dev).end_blk = (long long)FDEV(last_dev).end_blk + blks; #ifdef CONFIG_BLK_DEV_ZONED - FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + - (int)(blks >> sbi->log_blocks_per_blkz); + FDEV(last_dev).nr_blkz = FDEV(last_dev).nr_blkz + + div_u64(blks, sbi->blocks_per_blkz); #endif } } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5ad6ac63e13f..28a00942802c 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -30,6 +30,8 @@ /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ +#define NR_GC_CHECKPOINT_SECS (3) /* data/node/dentry sections */ + struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 72269e7efd26..4638fee16a91 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -497,7 +497,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) fname.hash = de->hash_code; ino = le32_to_cpu(de->ino); - fake_mode = f2fs_get_de_type(de) << S_SHIFT; + fake_mode = fs_ftype_to_dtype(de->file_type) << S_DT_SHIFT; err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode); if (err) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7d2e2c0dba65..cf4327ad106c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -326,7 +326,6 @@ static void init_idisk_time(struct inode *inode) fi->i_disk_time[0] = inode->i_atime; fi->i_disk_time[1] = inode->i_ctime; fi->i_disk_time[2] = inode->i_mtime; - fi->i_disk_time[3] = fi->i_crtime; } static int do_read_inode(struct inode *inode) @@ -454,8 +453,8 @@ static int do_read_inode(struct inode *inode) fi->i_compress_level = compress_flag >> COMPRESS_LEVEL_OFFSET; fi->i_compress_flag = compress_flag & - (BIT(COMPRESS_LEVEL_OFFSET) - 1); - fi->i_cluster_size = 1 << fi->i_log_cluster_size; + GENMASK(COMPRESS_LEVEL_OFFSET - 1, 0); + fi->i_cluster_size = BIT(fi->i_log_cluster_size); set_inode_flag(inode, FI_COMPRESSED_FILE); } } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c76ca49f97b2..535778ce769d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,7 +22,7 @@ #include "acl.h" #include -static inline int is_extension_exist(const unsigned char *s, const char *sub, +static inline bool is_extension_exist(const unsigned char *s, const char *sub, bool tmp_ext) { size_t slen = strlen(s); @@ -30,19 +30,19 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub, int i; if (sublen == 1 && *sub == '*') - return 1; + return true; /* * filename format of multimedia file should be defined as: * "filename + '.' + extension + (optional: '.' + temp extension)". */ if (slen < sublen + 2) - return 0; + return false; if (!tmp_ext) { /* file has no temp extension */ if (s[slen - sublen - 1] != '.') - return 0; + return false; return !strncasecmp(s + slen - sublen, sub, sublen); } @@ -50,10 +50,10 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub, if (s[i] != '.') continue; if (!strncasecmp(s + i + 1, sub, sublen)) - return 1; + return true; } - return 0; + return false; } int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, @@ -995,12 +995,20 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, goto out; } + /* + * Copied from ext4_rename: we need to protect against old.inode + * directory getting converted from inline directory format into + * a normal one. + */ + if (S_ISDIR(old_inode->i_mode)) + inode_lock_nested(old_inode, I_MUTEX_NONDIR2); + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); - goto out; + goto out_unlock_old; } if (S_ISDIR(old_inode->i_mode)) { @@ -1108,6 +1116,9 @@ static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, f2fs_unlock_op(sbi); + if (S_ISDIR(old_inode->i_mode)) + inode_unlock(old_inode); + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); @@ -1122,6 +1133,9 @@ out_dir: f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); +out_unlock_old: + if (S_ISDIR(old_inode->i_mode)) + inode_unlock(old_inode); out: iput(whiteout); return err; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 99454d46a939..906fb67a99da 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -93,17 +93,15 @@ static inline void copy_node_info(struct node_info *dst, static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { - unsigned char mask = 0x01 << type; if (set) - ne->ni.flag |= mask; + ne->ni.flag |= BIT(type); else - ne->ni.flag &= ~mask; + ne->ni.flag &= ~BIT(type); } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { - unsigned char mask = 0x01 << type; - return ne->ni.flag & mask; + return ne->ni.flag & BIT(type); } static inline void nat_reset_flag(struct nat_entry *ne) @@ -225,7 +223,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); block_addr -= nm_i->nat_blkaddr; - block_addr ^= 1 << sbi->log_blocks_per_seg; + block_addr ^= BIT(sbi->log_blocks_per_seg); return block_addr + nm_i->nat_blkaddr; } @@ -395,7 +393,7 @@ static inline nid_t get_nid(struct page *p, int off, bool i) static inline int is_node(struct page *page, int type) { struct f2fs_node *rn = F2FS_NODE(page); - return le32_to_cpu(rn->footer.flag) & (1 << type); + return le32_to_cpu(rn->footer.flag) & BIT(type); } #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) @@ -408,9 +406,9 @@ static inline void set_cold_node(struct page *page, bool is_dir) unsigned int flag = le32_to_cpu(rn->footer.flag); if (is_dir) - flag &= ~(0x1 << COLD_BIT_SHIFT); + flag &= ~BIT(COLD_BIT_SHIFT); else - flag |= (0x1 << COLD_BIT_SHIFT); + flag |= BIT(COLD_BIT_SHIFT); rn->footer.flag = cpu_to_le32(flag); } @@ -419,9 +417,9 @@ static inline void set_mark(struct page *page, int mark, int type) struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) - flag |= (0x1 << type); + flag |= BIT(type); else - flag &= ~(0x1 << type); + flag &= ~BIT(type); rn->footer.flag = cpu_to_le32(flag); #ifdef CONFIG_F2FS_CHECK_FS diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 77fd453949b1..15d944a3ada5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -825,19 +825,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; bool fix_curseg_write_pointer = false; -#ifdef CONFIG_QUOTA - int quota_enabled; -#endif - if (s_flags & SB_RDONLY) { + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); - sbi->sb->s_flags &= ~SB_RDONLY; - } - -#ifdef CONFIG_QUOTA - /* Turn on quotas so that they are updated correctly */ - quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); -#endif INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&tmp_inode_list); @@ -909,11 +899,6 @@ skip: } } -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - if (quota_enabled) - f2fs_quota_off_umount(sbi->sb); -#endif sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return ret ? ret : err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c3d415db5855..c1506b4dcdbe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -217,7 +217,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA); + err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) { if (err == -ENOMEM) { f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); @@ -246,10 +246,16 @@ retry: } else { blkcnt_t count = 1; + err = inc_valid_block_count(sbi, inode, &count); + if (err) { + f2fs_put_dnode(&dn); + return err; + } + *old_addr = dn.data_blkaddr; f2fs_truncate_data_blocks_range(&dn, 1); dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count); - inc_valid_block_count(sbi, inode, &count); + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, ni.version, true, false); } @@ -406,27 +412,28 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi, 0, 0)) { - if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && - sbi->gc_thread->f2fs_gc_task) { - DEFINE_WAIT(wait); + if (has_enough_free_secs(sbi, 0, 0)) + return; - prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, - TASK_UNINTERRUPTIBLE); - wake_up(&sbi->gc_thread->gc_wait_queue_head); - io_schedule(); - finish_wait(&sbi->gc_thread->fggc_wq, &wait); - } else { - struct f2fs_gc_control gc_control = { - .victim_segno = NULL_SEGNO, - .init_gc_type = BG_GC, - .no_bg_gc = true, - .should_migrate_blocks = false, - .err_gc_skipped = false, - .nr_free_secs = 1 }; - f2fs_down_write(&sbi->gc_lock); - f2fs_gc(sbi, &gc_control); - } + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + struct f2fs_gc_control gc_control = { + .victim_segno = NULL_SEGNO, + .init_gc_type = BG_GC, + .no_bg_gc = true, + .should_migrate_blocks = false, + .err_gc_skipped = false, + .nr_free_secs = 1 }; + f2fs_down_write(&sbi->gc_lock); + f2fs_gc(sbi, &gc_control); } } @@ -1845,6 +1852,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, sector_t sector, nr_sects; block_t lblkstart = blkstart; int devi = 0; + u64 remainder = 0; if (f2fs_is_multi_device(sbi)) { devi = f2fs_target_device_index(sbi, blkstart); @@ -1860,9 +1868,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, if (f2fs_blkz_is_seq(sbi, devi, blkstart)) { sector = SECTOR_FROM_BLOCK(blkstart); nr_sects = SECTOR_FROM_BLOCK(blklen); + div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder); - if (sector & (bdev_zone_sectors(bdev) - 1) || - nr_sects != bdev_zone_sectors(bdev)) { + if (remainder || nr_sects != bdev_zone_sectors(bdev)) { f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path : "", blkstart, blklen); @@ -2067,9 +2075,11 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, if (force && start >= cpc->trim_start && (end - 1) <= cpc->trim_end) - continue; + continue; - if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) { + /* Should cover 2MB zoned device for zone-based reset */ + if (!f2fs_sb_has_blkzoned(sbi) && + (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); continue; @@ -2872,7 +2882,6 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); - const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; unsigned segno = NULL_SEGNO; unsigned short seg_type = curseg->seg_type; int i, cnt; @@ -2881,7 +2890,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { curseg->next_segno = segno; return 1; } @@ -2908,7 +2917,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { curseg->next_segno = segno; return 1; } @@ -4940,9 +4949,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) "New zone for curseg[%d] is not yet discarded. " "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - err = __f2fs_issue_discard_zone(sbi, zbd->bdev, - zone_sector >> log_sectors_per_block, - zone.len >> log_sectors_per_block); + err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, + zone.len >> log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); @@ -5001,48 +5009,6 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) return 0; } -static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx, - unsigned int dev_idx) -{ - if (!bdev_is_zoned(FDEV(dev_idx).bdev)) - return true; - return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq); -} - -/* Return the zone index in the given device */ -static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno, - int dev_idx) -{ - block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); - - return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >> - sbi->log_blocks_per_blkz; -} - -/* - * Return the usable segments in a section based on the zone's - * corresponding zone capacity. Zone is equal to a section. - */ -static inline unsigned int f2fs_usable_zone_segs_in_sec( - struct f2fs_sb_info *sbi, unsigned int segno) -{ - unsigned int dev_idx, zone_idx; - - dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno)); - zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx); - - /* Conventional zone's capacity is always equal to zone size */ - if (is_conv_zone(sbi, zone_idx, dev_idx)) - return sbi->segs_per_sec; - - if (!sbi->unusable_blocks_per_sec) - return sbi->segs_per_sec; - - /* Get the segment count beyond zone capacity block */ - return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >> - sbi->log_blocks_per_seg); -} - /* * Return the number of usable blocks in a segment. The number of blocks * returned is always equal to the number of blocks in a segment for @@ -5055,23 +5021,13 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( struct f2fs_sb_info *sbi, unsigned int segno) { block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr; - unsigned int zone_idx, dev_idx, secno; - - secno = GET_SEC_FROM_SEG(sbi, segno); - seg_start = START_BLOCK(sbi, segno); - dev_idx = f2fs_target_device_index(sbi, seg_start); - zone_idx = get_zone_idx(sbi, secno, dev_idx); - - /* - * Conventional zone's capacity is always equal to zone size, - * so, blocks per segment is unchanged. - */ - if (is_conv_zone(sbi, zone_idx, dev_idx)) - return sbi->blocks_per_seg; + unsigned int secno; if (!sbi->unusable_blocks_per_sec) return sbi->blocks_per_seg; + secno = GET_SEC_FROM_SEG(sbi, segno); + seg_start = START_BLOCK(sbi, segno); sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno)); sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi); @@ -5105,11 +5061,6 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi return 0; } -static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - return 0; -} #endif unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno) @@ -5124,7 +5075,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int segno) { if (f2fs_sb_has_blkzoned(sbi)) - return f2fs_usable_zone_segs_in_sec(sbi, segno); + return CAP_SEGS_PER_SEC(sbi); return sbi->segs_per_sec; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index efdb7fc3b797..2ca8fb5d0dc4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -104,6 +104,9 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define CAP_BLKS_PER_SEC(sbi) \ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ (sbi)->unusable_blocks_per_sec) +#define CAP_SEGS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\ + (sbi)->log_blocks_per_seg)) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec) #define GET_SEG_FROM_SEC(sbi, secno) \ @@ -286,7 +289,6 @@ enum dirty_type { }; struct dirty_seglist_info { - const struct victim_selection *v_ops; /* victim selction operation */ unsigned long *dirty_segmap[NR_DIRTY_TYPE]; unsigned long *dirty_secmap; struct mutex seglist_lock; /* lock for segment bitmaps */ @@ -297,12 +299,6 @@ struct dirty_seglist_info { bool enable_pin_section; /* enable pinning section */ }; -/* victim selection function for cleaning and SSR */ -struct victim_selection { - int (*get_victim)(struct f2fs_sb_info *, unsigned int *, - int, int, char, unsigned long long); -}; - /* for active log information */ struct curseg_info { struct mutex curseg_mutex; /* lock for consistency */ @@ -599,8 +595,12 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, return true; } -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, - int freed, int needed) +/* + * calculate needed sections for dirty node/dentry + * and call has_curseg_enough_space + */ +static inline void __get_secs_required(struct f2fs_sb_info *sbi, + unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) { unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + get_pages(sbi, F2FS_DIRTY_DENTS) + @@ -610,27 +610,50 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); - unsigned int free, need_lower, need_upper; + + if (lower_p) + *lower_p = node_secs + dent_secs; + if (upper_p) + *upper_p = node_secs + dent_secs + + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + if (curseg_p) + *curseg_p = has_curseg_enough_space(sbi, + node_blocks, dent_blocks); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, + int freed, int needed) +{ + unsigned int free_secs, lower_secs, upper_secs; + bool curseg_space; if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; - free = free_sections(sbi) + freed; - need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed; - need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + __get_secs_required(sbi, &lower_secs, &upper_secs, &curseg_space); - if (free > need_upper) + free_secs = free_sections(sbi) + freed; + lower_secs += needed + reserved_sections(sbi); + upper_secs += needed + reserved_sections(sbi); + + if (free_secs > upper_secs) return false; - else if (free <= need_lower) + else if (free_secs <= lower_secs) return true; - return !has_curseg_enough_space(sbi, node_blocks, dent_blocks); + return !curseg_space; +} + +static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, + int freed, int needed) +{ + return !has_not_enough_free_secs(sbi, freed, needed); } static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; - if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + if (likely(has_enough_free_secs(sbi, 0, 0))) return true; return false; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5c1c3a84501f..9f15b03037db 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -880,8 +880,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (args->from && match_int(args, &arg)) return -EINVAL; if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { - f2fs_warn(sbi, "Not support %d, larger than %d", - 1 << arg, BIO_MAX_VECS); + f2fs_warn(sbi, "Not support %ld, larger than %d", + BIT(arg), BIO_MAX_VECS); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; @@ -1179,9 +1179,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) kfree(name); break; case Opt_compress_chksum: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } F2FS_OPTION(sbi).compress_chksum = true; break; case Opt_compress_mode: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } name = match_strdup(&args[0]); if (!name) return -ENOMEM; @@ -1196,6 +1204,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) kfree(name); break; case Opt_compress_cache: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_info(sbi, "Image doesn't support compression"); + break; + } set_opt(sbi, COMPRESS_CACHE); break; #else @@ -1310,7 +1322,7 @@ default_check: #endif if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", + f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO", F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } @@ -2060,10 +2072,12 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); - F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; - F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; - F2FS_OPTION(sbi).compress_ext_cnt = 0; - F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; + if (f2fs_sb_has_compression(sbi)) { + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; + F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; + F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; + } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; @@ -2274,7 +2288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & SB_RDONLY)) goto skip; - if (f2fs_sb_has_readonly(sbi) && !(*flags & SB_RDONLY)) { + if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) { err = -EROFS; goto restore_opts; } @@ -2487,6 +2501,54 @@ restore_opts: } #ifdef CONFIG_QUOTA +static bool f2fs_need_recovery(struct f2fs_sb_info *sbi) +{ + /* need to recovery orphan */ + if (is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) + return true; + /* need to recovery data */ + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) + return false; + if (test_opt(sbi, NORECOVERY)) + return false; + return !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG); +} + +static bool f2fs_recover_quota_begin(struct f2fs_sb_info *sbi) +{ + bool readonly = f2fs_readonly(sbi->sb); + + if (!f2fs_need_recovery(sbi)) + return false; + + /* it doesn't need to check f2fs_sb_has_readonly() */ + if (f2fs_hw_is_readonly(sbi)) + return false; + + if (readonly) { + sbi->sb->s_flags &= ~SB_RDONLY; + set_sbi_flag(sbi, SBI_IS_WRITABLE); + } + + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + return f2fs_enable_quota_files(sbi, readonly); +} + +static void f2fs_recover_quota_end(struct f2fs_sb_info *sbi, + bool quota_enabled) +{ + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); + + if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) { + clear_sbi_flag(sbi, SBI_IS_WRITABLE); + sbi->sb->s_flags |= SB_RDONLY; + } +} + /* Read data from quotafile */ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) @@ -3260,7 +3322,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, raw_super->segment_count = cpu_to_le32((main_end_blkaddr - segment0_blkaddr) >> log_blocks_per_seg); - if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) { + if (f2fs_readonly(sb) || f2fs_hw_is_readonly(sbi)) { set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { @@ -3348,7 +3410,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, total_sections = le32_to_cpu(raw_super->section_count); /* blocks_per_seg should be 512, given the above check */ - blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg); + blocks_per_seg = BIT(le32_to_cpu(raw_super->log_blocks_per_seg)); if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { @@ -3617,9 +3679,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->log_sectors_per_block = le32_to_cpu(raw_super->log_sectors_per_block); sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize); - sbi->blocksize = 1 << sbi->log_blocksize; + sbi->blocksize = BIT(sbi->log_blocksize); sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); - sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg; + sbi->blocks_per_seg = BIT(sbi->log_blocks_per_seg); sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); sbi->total_sections = le32_to_cpu(raw_super->section_count); @@ -3744,12 +3806,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) SECTOR_TO_BLOCK(zone_sectors)) return -EINVAL; sbi->blocks_per_blkz = SECTOR_TO_BLOCK(zone_sectors); - if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz != - __ilog2_u32(sbi->blocks_per_blkz)) - return -EINVAL; - sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz); - FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >> - sbi->log_blocks_per_blkz; + FDEV(devi).nr_blkz = div_u64(SECTOR_TO_BLOCK(nr_sectors), + sbi->blocks_per_blkz); if (nr_sectors & (zone_sectors - 1)) FDEV(devi).nr_blkz++; @@ -3836,7 +3894,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) int err; if ((recover && f2fs_readonly(sbi->sb)) || - bdev_read_only(sbi->sb->s_bdev)) { + f2fs_hw_is_readonly(sbi)) { set_sbi_flag(sbi, SBI_NEED_SB_WRITE); return -EROFS; } @@ -3875,7 +3933,7 @@ void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason) f2fs_down_write(&sbi->sb_lock); - if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1)) + if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0)) raw_super->s_stop_reason[reason]++; err = f2fs_commit_super(sbi, false); @@ -4025,7 +4083,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).start_blk, FDEV(i).end_blk); } f2fs_info(sbi, - "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -4102,6 +4160,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) int recovery, i, valid_super_block; struct curseg_info *seg_i; int retry_cnt = 1; +#ifdef CONFIG_QUOTA + bool quota_enabled = false; +#endif try_onemore: err = -EINVAL; @@ -4395,6 +4456,8 @@ try_onemore: if (err) f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } + + quota_enabled = f2fs_recover_quota_begin(sbi); #endif /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); @@ -4452,6 +4515,10 @@ try_onemore: } } +#ifdef CONFIG_QUOTA + f2fs_recover_quota_end(sbi, quota_enabled); +#endif + /* * If the f2fs is not readonly and fsync data recovery succeeds, * check zoned block devices' write pointer consistency. diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0b19163c90d4..8ea05340bad9 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -312,19 +312,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) { struct ckpt_req_control *cprc = &sbi->cprc_info; - int len = 0; int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio); int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio); - if (class == IOPRIO_CLASS_RT) - len += scnprintf(buf + len, PAGE_SIZE - len, "rt,"); - else if (class == IOPRIO_CLASS_BE) - len += scnprintf(buf + len, PAGE_SIZE - len, "be,"); - else + if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE) return -EINVAL; - len += scnprintf(buf + len, PAGE_SIZE - len, "%d\n", data); - return len; + return sysfs_emit(buf, "%s,%d\n", + class == IOPRIO_CLASS_RT ? "rt" : "be", data); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -452,7 +447,7 @@ out: if (ret < 0) return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX)) + if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX)) return -EINVAL; if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) return -EINVAL; @@ -575,9 +570,9 @@ out: if (!strcmp(a->attr.name, "iostat_period_ms")) { if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS) return -EINVAL; - spin_lock(&sbi->iostat_lock); + spin_lock_irq(&sbi->iostat_lock); sbi->iostat_period_ms = (unsigned int)t; - spin_unlock(&sbi->iostat_lock); + spin_unlock_irq(&sbi->iostat_lock); return count; } #endif @@ -598,6 +593,20 @@ out: sbi->compr_new_inode = 0; return count; } + + if (!strcmp(a->attr.name, "compress_percent")) { + if (t == 0 || t > 100) + return -EINVAL; + *ui = t; + return count; + } + + if (!strcmp(a->attr.name, "compress_watermark")) { + if (t == 0 || t > 100) + return -EINVAL; + *ui = t; + return count; + } #endif if (!strcmp(a->attr.name, "atgc_candidate_ratio")) { @@ -950,6 +959,8 @@ F2FS_FEATURE_RO_ATTR(compression); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_percent, compress_percent); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_watermark, compress_watermark); #endif F2FS_FEATURE_RO_ATTR(pin_file); @@ -1057,6 +1068,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(compr_written_block), ATTR_LIST(compr_saved_block), ATTR_LIST(compr_new_inode), + ATTR_LIST(compress_percent), + ATTR_LIST(compress_watermark), #endif /* For ATGC */ ATTR_LIST(atgc_candidate_ratio), @@ -1449,25 +1462,14 @@ put_sb_kobj: void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { - if (sbi->s_proc) { -#ifdef CONFIG_F2FS_IOSTAT - remove_proc_entry("iostat_info", sbi->s_proc); -#endif - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry("segment_bits", sbi->s_proc); - remove_proc_entry("victim_bits", sbi->s_proc); - remove_proc_entry("discard_plist_info", sbi->s_proc); - remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); - } + if (sbi->s_proc) + remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root); - kobject_del(&sbi->s_stat_kobj); kobject_put(&sbi->s_stat_kobj); wait_for_completion(&sbi->s_stat_kobj_unregister); - kobject_del(&sbi->s_feature_list_kobj); kobject_put(&sbi->s_feature_list_kobj); wait_for_completion(&sbi->s_feature_list_kobj_unregister); - kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1701f25117ea..1d6402529d10 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -40,9 +40,8 @@ #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */ #define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) #define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1) @@ -340,7 +339,7 @@ enum { OFFSET_BIT_SHIFT }; -#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */ +#define OFFSET_BIT_MASK GENMASK(OFFSET_BIT_SHIFT - 1, 0) struct node_footer { __le32 nid; /* node id */ @@ -545,7 +544,7 @@ typedef __le32 f2fs_hash_t; #define MAX_DIR_HASH_DEPTH 63 /* MAX buckets in one level of dir */ -#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) +#define MAX_DIR_BUCKETS BIT((MAX_DIR_HASH_DEPTH / 2) - 1) /* * space utilization of regular dentry and inline dentry (w/o extra reservation) @@ -585,21 +584,6 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; -/* file types used in inode_info->flags */ -enum { - F2FS_FT_UNKNOWN, - F2FS_FT_REG_FILE, - F2FS_FT_DIR, - F2FS_FT_CHRDEV, - F2FS_FT_BLKDEV, - F2FS_FT_FIFO, - F2FS_FT_SOCK, - F2FS_FT_SYMLINK, - F2FS_FT_MAX -}; - -#define S_SHIFT 12 - #define F2FS_DEF_PROJID 0 /* default project ID */ #endif /* _LINUX_F2FS_FS_H */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1322d34a5dfc..99cbc5949e3c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -512,7 +512,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(nid_t, nid[3]) + __array(nid_t, nid, 3) __field(int, depth) __field(int, err) ),