diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 67b3ed8e8c2f..3dfee94e0618 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -370,3 +370,10 @@ Date: April 2020 Contact: "Daeho Jeong" Description: Give a way to change iostat_period time. 3secs by default. The new iostat trace gives stats gap given the period. +What: /sys/fs/f2fs//max_io_bytes +Date: December 2020 +Contact: "Jaegeuk Kim" +Description: This gives a control to limit the bio size in f2fs. + Default is zero, which will follow underlying block layer limit, + whereas, if it has a certain bytes value, f2fs won't submit a + bio larger than that size. diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index b8ee761c9922..dae15c96e659 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -260,6 +260,14 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab For other files, we can still enable compression via ioctl. Note that, there is one reserved special extension '*', it can be set to enable compression for all files. +compress_chksum Support verifying chksum of raw data in compressed cluster. +compress_mode=%s Control file compression mode. This supports "fs" and "user" + modes. In "fs" mode (default), f2fs does automatic compression + on the compression enabled files. In "user" mode, f2fs disables + the automaic compression and gives the user discretion of + choosing the target file and the timing. The user can do manual + compression/decompression on the compression enabled files using + ioctls. inlinecrypt When possible, encrypt/decrypt the contents of encrypted files using the blk-crypto framework rather than filesystem-layer encryption. This allows the use of @@ -810,6 +818,34 @@ Compress metadata layout:: | data length | data chksum | reserved | compressed data | +-------------+-------------+----------+----------------------------+ +Compression mode +-------------------------- + +f2fs supports "fs" and "user" compression modes with "compression_mode" mount option. +With this option, f2fs provides a choice to select the way how to compress the +compression enabled files (refer to "Compression implementation" section for how to +enable compression on a regular inode). + +1) compress_mode=fs +This is the default option. f2fs does automatic compression in the writeback of the +compression enabled files. + +2) compress_mode=user +This disables the automaic compression and gives the user discretion of choosing the +target file and the timing. The user can do manual compression/decompression on the +compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE +ioctls like the below. + +To decompress a file, + +fd = open(filename, O_WRONLY, 0); +ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE); + +To compress a file, + +fd = open(filename, O_WRONLY, 0); +ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE); + NVMe Zoned Namespace devices ---------------------------- diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 306413589827..1e5e9b1136ee 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, struct page *dpage) { struct posix_acl *default_acl = NULL, *acl = NULL; - int error = 0; + int error; error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 023462e80e58..617d0f6b0836 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -37,7 +37,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page = NULL; + struct page *page; repeat: page = f2fs_grab_cache_page(mapping, index, false); if (!page) { @@ -348,13 +348,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* if locked failed, cp will flush dirty pages instead */ - if (!mutex_trylock(&sbi->cp_mutex)) + if (!down_write_trylock(&sbi->cp_global_sem)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -1385,6 +1385,27 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, f2fs_submit_merged_write(sbi, META_FLUSH); } +static inline u64 get_sectors_written(struct block_device *bdev) +{ + return bdev->bd_part ? + (u64)part_stat_read(bdev->bd_part, sectors[STAT_WRITE]) : 0; +} + +u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi) +{ + if (f2fs_is_multi_device(sbi)) { + u64 sectors = 0; + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + sectors += get_sectors_written(FDEV(i).bdev); + + return sectors; + } + + return get_sectors_written(sbi->sb->s_bdev); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1395,7 +1416,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; int i; int cp_payload_blks = __cp_payload(sbi); - struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; int err; @@ -1490,9 +1510,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Record write statistics in the hot node summary */ kbytes_written = sbi->kbytes_written; - if (sb->s_bdev->bd_part) - kbytes_written += BD_PART_WRITTEN(sbi); - + kbytes_written += (f2fs_get_sectors_written(sbi) - + sbi->sectors_written_start) >> 1; seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); if (__remain_node_summaries(cpc->reason)) { @@ -1572,7 +1591,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_warn(sbi, "Start checkpoint disabled!"); } if (cpc->reason != CP_RESIZE) - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1600,7 +1619,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; } - if (NM_I(sbi)->dirty_nat_cnt == 0 && + if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { f2fs_flush_sit_entries(sbi, cpc); @@ -1647,7 +1666,7 @@ stop: trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: if (cpc->reason != CP_RESIZE) - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); return err; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c5fee4d7ea72..4bcbacfe3325 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -602,6 +602,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) f2fs_cops[fi->i_compress_algorithm]; unsigned int max_len, new_nr_cpages; struct page **new_cpages; + u32 chksum = 0; int i, ret; trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, @@ -655,6 +656,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cbuf->clen = cpu_to_le32(cc->clen); + if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM) + chksum = f2fs_crc32(F2FS_I_SB(cc->inode), + cc->cbuf->cdata, cc->clen); + cc->cbuf->chksum = cpu_to_le32(chksum); + for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); @@ -790,6 +796,22 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) ret = cops->decompress_pages(dic); + if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) { + u32 provided = le32_to_cpu(dic->cbuf->chksum); + u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); + + if (provided != calculated) { + if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { + set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT); + printk_ratelimited( + "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x", + KERN_INFO, sbi->sb->s_id, dic->inode->i_ino, + provided, calculated); + } + set_sbi_flag(sbi, SBI_NEED_FSCK); + } + } + out_vunmap_cbuf: vm_unmap_ram(dic->cbuf, dic->nr_cpages); out_vunmap_rbuf: @@ -919,7 +941,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) static bool cluster_may_compress(struct compress_ctx *cc) { - if (!f2fs_compressed_file(cc->inode)) + if (!f2fs_need_compress_data(cc->inode)) return false; if (f2fs_is_atomic_file(cc->inode)) return false; diff --git a/fs/f2fs/compress.h b/fs/f2fs/compress.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 051a95cdf3a2..d1e83f119338 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -741,6 +741,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { + if (unlikely(sbi->max_io_bytes && + bio->bi_iter.bi_size >= sbi->max_io_bytes)) + return false; if (last_blkaddr + 1 != cur_blkaddr) return false; return __same_bdev(sbi, cur_blkaddr, bio); @@ -1756,6 +1759,16 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) return true; } +static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) +{ + return (bytes >> inode->i_blkbits); +} + +static inline u64 blks_to_bytes(struct inode *inode, u64 blks) +{ + return (blks << inode->i_blkbits); +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type, bool may_write) @@ -1764,7 +1777,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, int err; map.m_lblk = iblock; - map.m_len = bh->b_size >> inode->i_blkbits; + map.m_len = bytes_to_blks(inode, bh->b_size); map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; @@ -1774,20 +1787,11 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (!err) { map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = (u64)map.m_len << inode->i_blkbits; + bh->b_size = blks_to_bytes(inode, map.m_len); } return err; } -static int get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create, int flag, - pgoff_t *next_pgofs) -{ - return __get_data_block(inode, iblock, bh_result, create, - flag, next_pgofs, - NO_CHECK_TYPE, create); -} - static int get_data_block_dio_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -1806,24 +1810,6 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, false); } -static int get_data_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE, create); -} - -static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) -{ - return (offset >> inode->i_blkbits); -} - -static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) -{ - return (blk << inode->i_blkbits); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { @@ -1849,7 +1835,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = (__u64)blk_to_logical(inode, ni.blk_addr); + phys = blks_to_bytes(inode, ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); @@ -1881,7 +1867,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = (__u64)blk_to_logical(inode, ni.blk_addr); + phys = blks_to_bytes(inode, ni.blk_addr); len = inode->i_sb->s_blocksize; f2fs_put_page(page, 1); @@ -1919,7 +1905,7 @@ static loff_t max_inode_blocks(struct inode *inode) int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - struct buffer_head map_bh; + struct f2fs_map_blocks map; sector_t start_blk, last_blk; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; @@ -1951,29 +1937,31 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (logical_to_blk(inode, len) == 0) - len = blk_to_logical(inode, 1); + if (bytes_to_blks(inode, len) == 0) + len = blks_to_bytes(inode, 1); - start_blk = logical_to_blk(inode, start); - last_blk = logical_to_blk(inode, start + len - 1); + start_blk = bytes_to_blks(inode, start); + last_blk = bytes_to_blks(inode, start + len - 1); next: - memset(&map_bh, 0, sizeof(struct buffer_head)); - map_bh.b_size = len; + memset(&map, 0, sizeof(map)); + map.m_lblk = start_blk; + map.m_len = bytes_to_blks(inode, len); + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = NO_CHECK_TYPE; if (compr_cluster) - map_bh.b_size = blk_to_logical(inode, cluster_size - 1); + map.m_len = cluster_size - 1; - ret = get_data_block(inode, start_blk, &map_bh, 0, - F2FS_GET_BLOCK_FIEMAP, &next_pgofs); + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) goto out; /* HOLE */ - if (!buffer_mapped(&map_bh)) { + if (!(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; - if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, + if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, max_inode_blocks(inode))) goto prep_next; @@ -1999,9 +1987,9 @@ next: compr_cluster = false; - logical = blk_to_logical(inode, start_blk - 1); - phys = blk_to_logical(inode, map_bh.b_blocknr); - size = blk_to_logical(inode, cluster_size); + logical = blks_to_bytes(inode, start_blk - 1); + phys = blks_to_bytes(inode, map.m_pblk); + size = blks_to_bytes(inode, cluster_size); flags |= FIEMAP_EXTENT_ENCODED; @@ -2013,20 +2001,20 @@ next: goto prep_next; } - if (map_bh.b_blocknr == COMPRESS_ADDR) { + if (map.m_pblk == COMPRESS_ADDR) { compr_cluster = true; start_blk++; goto prep_next; } - logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, map_bh.b_blocknr); - size = map_bh.b_size; + logical = blks_to_bytes(inode, start_blk); + phys = blks_to_bytes(inode, map.m_pblk); + size = blks_to_bytes(inode, map.m_len); flags = 0; - if (buffer_unwritten(&map_bh)) + if (map.m_flags & F2FS_MAP_UNWRITTEN) flags = FIEMAP_EXTENT_UNWRITTEN; - start_blk += logical_to_blk(inode, size); + start_blk += bytes_to_blks(inode, size); prep_next: cond_resched(); @@ -2059,8 +2047,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, bool is_readahead) { struct bio *bio = *bio_ret; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; + const unsigned blocksize = blks_to_bytes(inode, 1); sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -2069,8 +2056,8 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; - last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >> - blkbits; + last_block_in_file = bytes_to_blks(inode, + f2fs_readpage_limit(inode) + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -2183,8 +2170,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; + const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; struct bio_post_read_ctx *ctx; bool for_verity = false; @@ -2193,8 +2179,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = (f2fs_readpage_limit(inode) + - blocksize - 1) >> blkbits; + last_block_in_file = bytes_to_blks(inode, + f2fs_readpage_limit(inode) + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { @@ -3205,7 +3191,7 @@ static inline bool __should_serialize_io(struct inode *inode, if (IS_NOQUOTA(inode)) return false; - if (f2fs_compressed_file(inode)) + if (f2fs_need_compress_data(inode)) return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; @@ -3881,9 +3867,6 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - struct buffer_head tmp = { - .b_size = i_blocksize(inode), - }; sector_t blknr = 0; if (f2fs_has_inline_data(inode)) @@ -3900,8 +3883,16 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) if (f2fs_compressed_file(inode)) { blknr = f2fs_bmap_compress(inode, block); } else { - if (!get_data_block_bmap(inode, block, &tmp, 0)) - blknr = tmp.b_blocknr; + struct f2fs_map_blocks map; + + memset(&map, 0, sizeof(map)); + map.m_lblk = block; + map.m_len = 1; + map.m_next_pgofs = NULL; + map.m_seg_type = NO_CHECK_TYPE; + + if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP)) + blknr = map.m_pblk; } out: trace_f2fs_bmap(inode, block, blknr); @@ -3977,7 +3968,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sector_t highest_pblock = 0; int nr_extents = 0; unsigned long nr_pblocks; - unsigned long len; + u64 len; int ret; /* @@ -3985,29 +3976,31 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, * to be very smart. */ cur_lblock = 0; - last_lblock = logical_to_blk(inode, i_size_read(inode)); + last_lblock = bytes_to_blks(inode, i_size_read(inode)); len = i_size_read(inode); while (cur_lblock <= last_lblock && cur_lblock < sis->max) { - struct buffer_head map_bh; + struct f2fs_map_blocks map; pgoff_t next_pgofs; cond_resched(); - memset(&map_bh, 0, sizeof(struct buffer_head)); - map_bh.b_size = len - cur_lblock; + memset(&map, 0, sizeof(map)); + map.m_lblk = cur_lblock; + map.m_len = bytes_to_blks(inode, len) - cur_lblock; + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = NO_CHECK_TYPE; - ret = get_data_block(inode, cur_lblock, &map_bh, 0, - F2FS_GET_BLOCK_FIEMAP, &next_pgofs); + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) goto err_out; /* hole */ - if (!buffer_mapped(&map_bh)) + if (!(map.m_flags & F2FS_MAP_FLAGS)) goto err_out; - pblock = map_bh.b_blocknr; - nr_pblocks = logical_to_blk(inode, map_bh.b_size); + pblock = map.m_pblk; + nr_pblocks = map.m_len; if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -4050,7 +4043,6 @@ static int check_swap_activate(struct swap_info_struct *sis, struct inode *inode = mapping->host; unsigned blocks_per_page; unsigned long page_no; - unsigned blkbits; sector_t probe_block; sector_t last_block; sector_t lowest_block = -1; @@ -4061,8 +4053,7 @@ static int check_swap_activate(struct swap_info_struct *sis, if (PAGE_SIZE == F2FS_BLKSIZE) return check_swap_activate_fast(sis, swap_file, span); - blkbits = inode->i_blkbits; - blocks_per_page = PAGE_SIZE >> blkbits; + blocks_per_page = bytes_to_blks(inode, PAGE_SIZE); /* * Map all the blocks into the extent list. This code doesn't try @@ -4070,7 +4061,7 @@ static int check_swap_activate(struct swap_info_struct *sis, */ probe_block = 0; page_no = 0; - last_block = i_size_read(inode) >> blkbits; + last_block = bytes_to_blks(inode, i_size_read(inode)); while ((probe_block + blocks_per_page) <= last_block && page_no < sis->max) { unsigned block_in_page; @@ -4110,7 +4101,7 @@ static int check_swap_activate(struct swap_info_struct *sis, } } - first_block >>= (PAGE_SHIFT - blkbits); + first_block >>= (PAGE_SHIFT - inode->i_blkbits); if (page_no) { /* exclude the header page */ if (first_block < lowest_block) lowest_block = first_block; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a8357fd4f5fa..197c914119da 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; if (sbi->meta_inode) si->meta_pages = META_MAPPING(sbi)->nrpages; - si->nats = NM_I(sbi)->nat_cnt; - si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT]; + si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT]; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; @@ -278,9 +278,10 @@ get_cache: si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); - si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); - si->cache_mem += NM_I(sbi)->dirty_nat_cnt * - sizeof(struct nat_entry_set); + si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry); + si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] * + sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2675490158a9..5130423a13e7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -33,10 +33,8 @@ #else #define f2fs_bug_on(sbi, condition) \ do { \ - if (unlikely(condition)) { \ - WARN_ON(1); \ + if (WARN_ON(condition)) \ set_sbi_flag(sbi, SBI_NEED_FSCK); \ - } \ } while (0) #endif @@ -147,8 +145,10 @@ struct f2fs_mount_info { /* For compression */ unsigned char compress_algorithm; /* algorithm type */ - unsigned compress_log_size; /* cluster log size */ + unsigned char compress_log_size; /* cluster log size */ + bool compress_chksum; /* compressed data chksum */ unsigned char compress_ext_cnt; /* extension count */ + int compress_mode; /* compression mode */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; @@ -402,85 +402,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, return size <= MAX_SIT_JENTRIES(journal); } -/* - * f2fs-specific ioctl commands - */ -#define F2FS_IOCTL_MAGIC 0xf5 -#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) -#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) -#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) -#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) -#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) -#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ - struct f2fs_defragment) -#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ - struct f2fs_move_range) -#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ - struct f2fs_flush_device) -#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ - struct f2fs_gc_range) -#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) -#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) -#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) -#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) -#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) -#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) -#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ - _IOR(F2FS_IOCTL_MAGIC, 18, __u64) -#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ - _IOR(F2FS_IOCTL_MAGIC, 19, __u64) -#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ - struct f2fs_sectrim_range) - -/* - * should be same as XFS_IOC_GOINGDOWN. - * Flags for going down operation used by FS_IOC_GOINGDOWN - */ -#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ -#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ -#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ -#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ -#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ -#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ - -/* - * Flags used by F2FS_IOC_SEC_TRIM_FILE - */ -#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ -#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ -#define F2FS_TRIM_FILE_MASK 0x3 - -struct f2fs_gc_range { - u32 sync; - u64 start; - u64 len; -}; - -struct f2fs_defragment { - u64 start; - u64 len; -}; - -struct f2fs_move_range { - u32 dst_fd; /* destination fd */ - u64 pos_in; /* start position in src_fd */ - u64 pos_out; /* start position in dst_fd */ - u64 len; /* size to move */ -}; - -struct f2fs_flush_device { - u32 dev_num; /* device number to flush */ - u32 segments; /* # of segments to flush */ -}; - -struct f2fs_sectrim_range { - u64 start; - u64 len; - u64 flags; -}; - /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 static inline int get_extra_isize(struct inode *inode); @@ -755,7 +676,9 @@ enum { FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ FI_MMAP_FILE, /* indicate file was mmapped */ + FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ FI_MAX, /* max flag, never be used */ }; @@ -812,6 +735,7 @@ struct f2fs_inode_info { atomic_t i_compr_blocks; /* # of compressed blocks */ unsigned char i_compress_algorithm; /* algorithm type */ unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned short i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ }; @@ -896,6 +820,13 @@ enum nid_state { MAX_NID_STATE, }; +enum nat_state { + TOTAL_NAT, + DIRTY_NAT, + RECLAIMABLE_NAT, + MAX_NAT_STATE, +}; + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ @@ -911,8 +842,7 @@ struct f2fs_nm_info { struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ - unsigned int nat_cnt; /* the # of cached nat entries */ - unsigned int dirty_nat_cnt; /* total num of nat entries in set */ + unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ @@ -1322,6 +1252,18 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; +enum { + COMPR_MODE_FS, /* + * automatically compress compression + * enabled files + */ + COMPR_MODE_USER, /* + * automatical compression is disabled. + * user can control the file compression + * using ioctls + */ +}; + /* * this value is set in page as a private data which indicate that * the page is atomically written, and it is in inmem_pages list. @@ -1351,9 +1293,15 @@ enum compress_algorithm_type { COMPRESS_MAX, }; -#define COMPRESS_DATA_RESERVED_SIZE 5 +enum compress_flag { + COMPRESS_CHKSUM, + COMPRESS_MAX_FLAG, +}; + +#define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ + __le32 chksum; /* compressed data chksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; @@ -1449,7 +1397,7 @@ struct f2fs_sb_info { int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct mutex cp_mutex; /* checkpoint procedure lock */ + struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ struct rw_semaphore node_change; /* locking node change */ @@ -1499,6 +1447,7 @@ struct f2fs_sb_info { loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ int readdir_ra; /* readahead inode in readdir */ + u64 max_io_bytes; /* max io bytes to merge IOs */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ @@ -1674,13 +1623,6 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) return sbi->s_ndevs > 1; } -/* For write statistics. Suppose sector size is 512 bytes, - * and the return value is in kbytes. s is of struct f2fs_sb_info. - */ -#define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ - (s)->sectors_written_start) >> 1) - static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { unsigned long now = jiffies; @@ -2480,24 +2422,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) { - if (sbi->gc_mode == GC_URGENT_HIGH) - return true; - if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || get_pages(sbi, F2FS_DIO_READ) || get_pages(sbi, F2FS_DIO_WRITE)) - return false; + return true; if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) - return false; + return true; if (SM_I(sbi) && SM_I(sbi)->fcc_info && atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return true; + return false; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + + if (is_inflight_io(sbi, type)) return false; if (sbi->gc_mode == GC_URGENT_LOW && @@ -2832,6 +2781,22 @@ static inline int f2fs_compressed_file(struct inode *inode) is_inode_flag_set(inode, FI_COMPRESSED_FILE); } +static inline bool f2fs_need_compress_data(struct inode *inode) +{ + int compress_mode = F2FS_OPTION(F2FS_I_SB(inode)).compress_mode; + + if (!f2fs_compressed_file(inode)) + return false; + + if (compress_mode == COMPR_MODE_FS) + return true; + else if (compress_mode == COMPR_MODE_USER && + is_inode_flag_set(inode, FI_ENABLE_COMPRESS)) + return true; + + return false; +} + static inline unsigned int addrs_per_inode(struct inode *inode) { unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - @@ -3448,6 +3413,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); +u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); @@ -3963,6 +3929,9 @@ static inline void set_compress_context(struct inode *inode) F2FS_OPTION(sbi).compress_algorithm; F2FS_I(inode)->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + F2FS_I(inode)->i_compress_flag = + F2FS_OPTION(sbi).compress_chksum ? + 1 << COMPRESS_CHKSUM : 0; F2FS_I(inode)->i_cluster_size = 1 << F2FS_I(inode)->i_log_cluster_size; F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fe39e591e5b4..16ea10f2bcf5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -31,6 +31,7 @@ #include "gc.h" #include "trace.h" #include +#include static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { @@ -2479,26 +2480,19 @@ out: return ret; } -static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_gc_range range; + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); u64 end; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, - sizeof(range))) - return -EFAULT; - if (f2fs_readonly(sbi->sb)) return -EROFS; - end = range.start + range.len; - if (end < range.start || range.start < MAIN_BLKADDR(sbi) || + end = range->start + range->len; + if (end < range->start || range->start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) return -EINVAL; @@ -2507,7 +2501,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; do_more: - if (!range.sync) { + if (!range->sync) { if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; @@ -2516,20 +2510,30 @@ do_more: down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start)); if (ret) { if (ret == -EBUSY) ret = -EAGAIN; goto out; } - range.start += BLKS_PER_SEC(sbi); - if (range.start <= end) + range->start += BLKS_PER_SEC(sbi); + if (range->start <= end) goto do_more; out: mnt_drop_write_file(filp); return ret; } +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct f2fs_gc_range range; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + return __f2fs_ioc_gc_range(filp, &range); +} + static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2866,9 +2870,9 @@ out: return ret; } -static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +static int __f2fs_ioc_move_range(struct file *filp, + struct f2fs_move_range *range) { - struct f2fs_move_range range; struct fd dst; int err; @@ -2876,11 +2880,7 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) !(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, - sizeof(range))) - return -EFAULT; - - dst = fdget(range.dst_fd); + dst = fdget(range->dst_fd); if (!dst.file) return -EBADF; @@ -2893,21 +2893,25 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) if (err) goto err_out; - err = f2fs_move_file_range(filp, range.pos_in, dst.file, - range.pos_out, range.len); + err = f2fs_move_file_range(filp, range->pos_in, dst.file, + range->pos_out, range->len); mnt_drop_write_file(filp); - if (err) - goto err_out; - - if (copy_to_user((struct f2fs_move_range __user *)arg, - &range, sizeof(range))) - err = -EFAULT; err_out: fdput(dst); return err; } +static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +{ + struct f2fs_move_range range; + + if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, + sizeof(range))) + return -EFAULT; + return __f2fs_ioc_move_range(filp, &range); +} + static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -3944,13 +3948,265 @@ err: return ret; } -long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) - return -EIO; - if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) - return -ENOSPC; + struct inode *inode = file_inode(filp); + struct f2fs_comp_option option; + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + inode_lock_shared(inode); + + if (!f2fs_compressed_file(inode)) { + inode_unlock_shared(inode); + return -ENODATA; + } + + option.algorithm = F2FS_I(inode)->i_compress_algorithm; + option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; + + inode_unlock_shared(inode); + + if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, + sizeof(option))) + return -EFAULT; + + return 0; +} + +static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_comp_option option; + int ret = 0; + + if (!f2fs_sb_has_compression(sbi)) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, + sizeof(option))) + return -EFAULT; + + if (!f2fs_compressed_file(inode) || + option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) + return -EINVAL; + + file_start_write(filp); + inode_lock(inode); + + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { + ret = -EBUSY; + goto out; + } + + if (inode->i_size != 0) { + ret = -EFBIG; + goto out; + } + + F2FS_I(inode)->i_compress_algorithm = option.algorithm; + F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; + F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size; + f2fs_mark_inode_dirty_sync(inode, true); + + if (!f2fs_is_compress_backend_ready(inode)) + f2fs_warn(sbi, "compression algorithm is successfully set, " + "but current kernel doesn't support this algorithm."); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + +static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) +{ + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx); + struct address_space *mapping = inode->i_mapping; + struct page *page; + pgoff_t redirty_idx = page_idx; + int i, page_len = 0, ret = 0; + + page_cache_ra_unbounded(&ractl, len, 0); + + for (i = 0; i < len; i++, page_idx++) { + page = read_cache_page(mapping, page_idx, NULL, NULL); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + break; + } + page_len++; + } + + for (i = 0; i < page_len; i++, redirty_idx++) { + page = find_lock_page(mapping, redirty_idx); + if (!page) + ret = -ENOENT; + set_page_dirty(page); + f2fs_put_page(page, 1); + f2fs_put_page(page, 0); + } + + return ret; +} + +static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int blk_per_seg = sbi->blocks_per_seg; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, ret; + + if (!f2fs_sb_has_compression(sbi) || + F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + file_start_write(filp); + inode_lock(inode); + + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (f2fs_is_mmap_file(inode)) { + ret = -EBUSY; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + if (!atomic_read(&fi->i_compr_blocks)) + goto out; + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + count = last_idx - page_idx; + while (count) { + int len = min(cluster_size, count); + + ret = redirty_blocks(inode, page_idx, len); + if (ret < 0) + break; + + if (get_dirty_pages(inode) >= blk_per_seg) + filemap_fdatawrite(inode->i_mapping); + + count -= len; + page_idx += len; + } + + if (!ret) + ret = filemap_write_and_wait_range(inode->i_mapping, 0, + LLONG_MAX); + + if (ret) + f2fs_warn(sbi, "%s: The file might be partially decompressed " + "(errno=%d). Please delete the file.\n", + __func__, ret); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + +static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int blk_per_seg = sbi->blocks_per_seg; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, ret; + + if (!f2fs_sb_has_compression(sbi) || + F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + file_start_write(filp); + inode_lock(inode); + + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (f2fs_is_mmap_file(inode)) { + ret = -EBUSY; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + set_inode_flag(inode, FI_ENABLE_COMPRESS); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + count = last_idx - page_idx; + while (count) { + int len = min(cluster_size, count); + + ret = redirty_blocks(inode, page_idx, len); + if (ret < 0) + break; + + if (get_dirty_pages(inode) >= blk_per_seg) + filemap_fdatawrite(inode->i_mapping); + + count -= len; + page_idx += len; + } + + if (!ret) + ret = filemap_write_and_wait_range(inode->i_mapping, 0, + LLONG_MAX); + + clear_inode_flag(inode, FI_ENABLE_COMPRESS); + + if (ret) + f2fs_warn(sbi, "%s: The file might be partially compressed " + "(errno=%d). Please delete the file.\n", + __func__, ret); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + +static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ switch (cmd) { case FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -4032,11 +4288,29 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_reserve_compress_blocks(filp, arg); case F2FS_IOC_SEC_TRIM_FILE: return f2fs_sec_trim_file(filp, arg); + case F2FS_IOC_GET_COMPRESS_OPTION: + return f2fs_ioc_get_compress_option(filp, arg); + case F2FS_IOC_SET_COMPRESS_OPTION: + return f2fs_ioc_set_compress_option(filp, arg); + case F2FS_IOC_DECOMPRESS_FILE: + return f2fs_ioc_decompress_file(filp, arg); + case F2FS_IOC_COMPRESS_FILE: + return f2fs_ioc_compress_file(filp, arg); default: return -ENOTTY; } } +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) + return -ENOSPC; + + return __f2fs_ioctl(filp, cmd, arg); +} + static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -4153,8 +4427,63 @@ out: } #ifdef CONFIG_COMPAT +struct compat_f2fs_gc_range { + u32 sync; + compat_u64 start; + compat_u64 len; +}; +#define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ + struct compat_f2fs_gc_range) + +static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) +{ + struct compat_f2fs_gc_range __user *urange; + struct f2fs_gc_range range; + int err; + + urange = compat_ptr(arg); + err = get_user(range.sync, &urange->sync); + err |= get_user(range.start, &urange->start); + err |= get_user(range.len, &urange->len); + if (err) + return -EFAULT; + + return __f2fs_ioc_gc_range(file, &range); +} + +struct compat_f2fs_move_range { + u32 dst_fd; + compat_u64 pos_in; + compat_u64 pos_out; + compat_u64 len; +}; +#define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct compat_f2fs_move_range) + +static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) +{ + struct compat_f2fs_move_range __user *urange; + struct f2fs_move_range range; + int err; + + urange = compat_ptr(arg); + err = get_user(range.dst_fd, &urange->dst_fd); + err |= get_user(range.pos_in, &urange->pos_in); + err |= get_user(range.pos_out, &urange->pos_out); + err |= get_user(range.len, &urange->len); + if (err) + return -EFAULT; + + return __f2fs_ioc_move_range(file, &range); +} + long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) + return -ENOSPC; + switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; @@ -4165,6 +4494,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; + case F2FS_IOC32_GARBAGE_COLLECT_RANGE: + return f2fs_compat_ioc_gc_range(file, arg); + case F2FS_IOC32_MOVE_RANGE: + return f2fs_compat_ioc_move_range(file, arg); case F2FS_IOC_START_ATOMIC_WRITE: case F2FS_IOC_COMMIT_ATOMIC_WRITE: case F2FS_IOC_START_VOLATILE_WRITE: @@ -4182,10 +4515,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case FS_IOC_GET_ENCRYPTION_NONCE: case F2FS_IOC_GARBAGE_COLLECT: - case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: case FS_IOC_FSGETXATTR: @@ -4202,11 +4533,15 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: case F2FS_IOC_SEC_TRIM_FILE: + case F2FS_IOC_GET_COMPRESS_OPTION: + case F2FS_IOC_SET_COMPRESS_OPTION: + case F2FS_IOC_DECOMPRESS_FILE: + case F2FS_IOC_COMPRESS_FILE: break; default: return -ENOIOCTLCMD; } - return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); + return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 05641a1e36cc..3ef84e6ded41 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1986,7 +1986,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) freeze_super(sbi->sb); down_write(&sbi->gc_lock); - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + @@ -2031,7 +2031,7 @@ recover_out: spin_unlock(&sbi->stat_lock); } out_err: - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); up_write(&sbi->gc_lock); thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b93d0ccbf983..0a8f64feefe4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -206,7 +206,8 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; - if (!f2fs_has_inline_data(inode)) + if (!f2fs_has_inline_data(inode) || + f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) return 0; page = f2fs_grab_cache_page(inode->i_mapping, 0, false); @@ -284,7 +285,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) * [prev.] [next] of inline_data flag * o o -> recover inline_data * o x -> remove inline_data, and then recover data blocks - * x o -> remove inline_data, and then recover inline_data + * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ if (IS_INODE(npage)) @@ -316,6 +317,7 @@ process_inline: if (IS_ERR(ipage)) return PTR_ERR(ipage); f2fs_truncate_inline_inode(inode, ipage, 0); + stat_dec_inline_inode(inode); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { @@ -324,6 +326,7 @@ process_inline: ret = f2fs_truncate_blocks(inode, 0, false); if (ret) return ret; + stat_inc_inline_inode(inode); goto process_inline; } return 0; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 657db2fb6739..349d9cb933ee 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -456,6 +456,7 @@ static int do_read_inode(struct inode *inode) le64_to_cpu(ri->i_compr_blocks)); fi->i_compress_algorithm = ri->i_compress_algorithm; fi->i_log_cluster_size = ri->i_log_cluster_size; + fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag); fi->i_cluster_size = 1 << fi->i_log_cluster_size; set_inode_flag(inode, FI_COMPRESSED_FILE); } @@ -634,6 +635,8 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) &F2FS_I(inode)->i_compr_blocks)); ri->i_compress_algorithm = F2FS_I(inode)->i_compress_algorithm; + ri->i_compress_flag = + cpu_to_le16(F2FS_I(inode)->i_compress_flag); ri->i_log_cluster_size = F2FS_I(inode)->i_log_cluster_size; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d5d8ce077f29..3a24423ac65f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> - PAGE_SHIFT; + mem_size = (nm_i->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) res = false; @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) @@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); - nm_i->nat_cnt++; + nm_i->nat_cnt[TOTAL_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; return ne; } @@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); - nm_i->nat_cnt--; + nm_i->nat_cnt[TOTAL_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; __free_nat_entry(e); } @@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; - nm_i->dirty_nat_cnt++; + nm_i->nat_cnt[DIRTY_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; - nm_i->dirty_nat_cnt--; + nm_i->nat_cnt[DIRTY_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -2590,9 +2594,15 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) ri = F2FS_INODE(page); if (ri->i_inline & F2FS_INLINE_XATTR) { - set_inode_flag(inode, FI_INLINE_XATTR); + if (!f2fs_has_inline_xattr(inode)) { + set_inode_flag(inode, FI_INLINE_XATTR); + stat_inc_inline_xattr(inode); + } } else { - clear_inode_flag(inode, FI_INLINE_XATTR); + if (f2fs_has_inline_xattr(inode)) { + stat_dec_inline_xattr(inode); + clear_inode_flag(inode, FI_INLINE_XATTR); + } goto update_inode; } @@ -2944,14 +2954,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) LIST_HEAD(sets); int err = 0; - /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + /* + * during unmount, let's flush nat_bits before checking + * nat_cnt[DIRTY_NAT]. + */ if (enabled_nat_bits(sbi, cpc)) { down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); up_write(&nm_i->nat_tree_lock); } - if (!nm_i->dirty_nat_cnt) + if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; down_write(&nm_i->nat_tree_lock); @@ -2962,7 +2975,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * into nat entry set. */ if (enabled_nat_bits(sbi, cpc) || - !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) + !__has_cursum_space(journal, + nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, @@ -3086,7 +3100,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; - nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; @@ -3220,7 +3233,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } } - f2fs_bug_on(sbi, nm_i->nat_cnt); + f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); /* destroy nat set cache */ nid = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 69e5859e993c..f84541b57acb 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid * + return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; + return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 0947d36af1a8..da75d5d52f0a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -799,7 +799,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); @@ -850,7 +850,7 @@ skip: if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f2a4265318f5..deca74cb17df 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -529,31 +529,38 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi, REQ_TIME) && - (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) + if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || + excess_prefree_segs(sbi)) + goto do_sync; + + /* there is background inflight IO or foreground operation recently */ + if (is_inflight_io(sbi, REQ_TIME) || + (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) return; + /* exceed periodical checkpoint timeout threshold */ + if (f2fs_time_over(sbi, CP_TIME)) + goto do_sync; + /* checkpoint is the only way to shrink partial cached entries */ - if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || - !f2fs_available_free_memory(sbi, INO_ENTRIES) || - excess_prefree_segs(sbi) || - excess_dirty_nats(sbi) || - excess_dirty_nodes(sbi) || - f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH) && from_bg) { - struct blk_plug plug; + if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || + f2fs_available_free_memory(sbi, INO_ENTRIES)) + return; - mutex_lock(&sbi->flush_lock); +do_sync: + if (test_opt(sbi, DATA_FLUSH) && from_bg) { + struct blk_plug plug; - blk_start_plug(&plug); - f2fs_sync_dirty_inodes(sbi, FILE_INODE); - blk_finish_plug(&plug); + mutex_lock(&sbi->flush_lock); - mutex_unlock(&sbi->flush_lock); - } - f2fs_sync_fs(sbi->sb, true); - stat_inc_bg_cp_count(sbi->stat_info); + blk_start_plug(&plug); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } + f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, @@ -3254,7 +3261,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } - if (file_is_cold(inode) || f2fs_compressed_file(inode)) + if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index d66de5999a26..dd3c3c7a90ec 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -18,9 +18,7 @@ static unsigned int shrinker_run_no; static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; - - return count > 0 ? count : 0; + return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT]; } static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 42293b7ceaf2..e8b94bb14bd8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -146,6 +146,8 @@ enum { Opt_compress_algorithm, Opt_compress_log_size, Opt_compress_extension, + Opt_compress_chksum, + Opt_compress_mode, Opt_atgc, Opt_err, }; @@ -214,6 +216,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_algorithm, "compress_algorithm=%s"}, {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, + {Opt_compress_chksum, "compress_chksum"}, + {Opt_compress_mode, "compress_mode=%s"}, {Opt_atgc, "atgc"}, {Opt_err, NULL}, }; @@ -934,10 +938,29 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; + case Opt_compress_chksum: + F2FS_OPTION(sbi).compress_chksum = true; + break; + case Opt_compress_mode: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "fs")) { + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; + } else if (!strcmp(name, "user")) { + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; #else case Opt_compress_algorithm: case Opt_compress_log_size: case Opt_compress_extension: + case Opt_compress_chksum: + case Opt_compress_mode: f2fs_info(sbi, "compression options not supported"); break; #endif @@ -1523,6 +1546,14 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, seq_printf(seq, ",compress_extension=%s", F2FS_OPTION(sbi).extensions[i]); } + + if (F2FS_OPTION(sbi).compress_chksum) + seq_puts(seq, ",compress_chksum"); + + if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_FS) + seq_printf(seq, ",compress_mode=%s", "fs"); + else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER) + seq_printf(seq, ",compress_mode=%s", "user"); } static int f2fs_show_options(struct seq_file *seq, struct dentry *root) @@ -1672,6 +1703,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; sbi->sb->s_flags &= ~SB_INLINECRYPT; @@ -1904,7 +1936,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (*flags & SB_RDONLY || F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { - writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2744,7 +2775,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2770,18 +2800,11 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } } - /* Currently, support only 4KB page cache size */ - if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", - PAGE_SIZE); - return -EFSCORRUPTED; - } - /* Currently, support only 4KB block size */ - blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); - if (blocksize != F2FS_BLKSIZE) { - f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", - blocksize); + if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { + f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", + le32_to_cpu(raw_super->log_blocksize), + F2FS_BLKSIZE_BITS); return -EFSCORRUPTED; } @@ -3071,9 +3094,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->total_node_count = (le32_to_cpu(raw_super->segment_count_nat) / 2) * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; - sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); - sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); - sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); + F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); + F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; @@ -3552,7 +3575,7 @@ try_onemore: sbi->valid_super_block = valid_super_block; init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - mutex_init(&sbi->cp_mutex); + init_rwsem(&sbi->cp_global_sem); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3693,10 +3716,7 @@ try_onemore: } /* For write statistics */ - if (sb->s_bdev->bd_part) - sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]); + sbi->sectors_written_start = f2fs_get_sectors_written(sbi); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); @@ -3911,6 +3931,7 @@ free_bio_info: #ifdef CONFIG_UNICODE utf8_unload(sb->s_encoding); + sb->s_encoding = NULL; #endif free_options: #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ec77ccfea923..989a649cfa8b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -97,7 +97,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, return sprintf(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + - BD_PART_WRITTEN(sbi))); + ((f2fs_get_sectors_written(sbi) - + sbi->sectors_written_start) >> 1))); } static ssize_t features_show(struct f2fs_attr *a, @@ -566,6 +567,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -650,6 +652,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), + ATTR_LIST(max_io_bytes), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..7dc2a06cf19a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -273,7 +273,7 @@ struct f2fs_inode { __le64 i_compr_blocks; /* # of compressed blocks */ __u8 i_compress_algorithm; /* compress algorithm */ __u8 i_log_cluster_size; /* log of cluster size */ - __le16 i_padding; /* padding */ + __le16 i_compress_flag; /* compress flag */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f8f1e85ff130..56b113e3cd6a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -6,6 +6,7 @@ #define _TRACE_F2FS_H #include +#include #define show_dev(dev) MAJOR(dev), MINOR(dev) #define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h new file mode 100644 index 000000000000..352a822d4370 --- /dev/null +++ b/include/uapi/linux/f2fs.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_F2FS_H +#define _UAPI_LINUX_F2FS_H +#include +#include + +/* + * f2fs-specific ioctl commands + */ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) +#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) +#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ + struct f2fs_defragment) +#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct f2fs_move_range) +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ + struct f2fs_flush_device) +#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ + struct f2fs_gc_range) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) +#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) +#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) +#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) +#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 18, __u64) +#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 19, __u64) +#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ + struct f2fs_sectrim_range) +#define F2FS_IOC_GET_COMPRESS_OPTION _IOR(F2FS_IOCTL_MAGIC, 21, \ + struct f2fs_comp_option) +#define F2FS_IOC_SET_COMPRESS_OPTION _IOW(F2FS_IOCTL_MAGIC, 22, \ + struct f2fs_comp_option) +#define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) +#define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) + +/* + * should be same as XFS_IOC_GOINGDOWN. + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ +#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ +#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ + +/* + * Flags used by F2FS_IOC_SEC_TRIM_FILE + */ +#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ +#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ +#define F2FS_TRIM_FILE_MASK 0x3 + +struct f2fs_gc_range { + __u32 sync; + __u64 start; + __u64 len; +}; + +struct f2fs_defragment { + __u64 start; + __u64 len; +}; + +struct f2fs_move_range { + __u32 dst_fd; /* destination fd */ + __u64 pos_in; /* start position in src_fd */ + __u64 pos_out; /* start position in dst_fd */ + __u64 len; /* size to move */ +}; + +struct f2fs_flush_device { + __u32 dev_num; /* device number to flush */ + __u32 segments; /* # of segments to flush */ +}; + +struct f2fs_sectrim_range { + __u64 start; + __u64 len; + __u64 flags; +}; + +struct f2fs_comp_option { + __u8 algorithm; + __u8 log_cluster_size; +}; + +#endif /* _UAPI_LINUX_F2FS_H */