diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 083ac2d63eef..483639fb727b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -466,6 +466,30 @@ Description: Show status of f2fs superblock in real time. 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= +What: /sys/fs/f2fs//stat/cp_status +Date: September 2022 +Contact: "Chao Yu" +Description: Show status of f2fs checkpoint in real time. + + =============================== ============================== + cp flag value + CP_UMOUNT_FLAG 0x00000001 + CP_ORPHAN_PRESENT_FLAG 0x00000002 + CP_COMPACT_SUM_FLAG 0x00000004 + CP_ERROR_FLAG 0x00000008 + CP_FSCK_FLAG 0x00000010 + CP_FASTBOOT_FLAG 0x00000020 + CP_CRC_RECOVERY_FLAG 0x00000040 + CP_NAT_BITS_FLAG 0x00000080 + CP_TRIMMED_FLAG 0x00000100 + CP_NOCRC_RECOVERY_FLAG 0x00000200 + CP_LARGE_NAT_BITMAP_FLAG 0x00000400 + CP_QUOTA_NEED_FSCK_FLAG 0x00000800 + CP_DISABLED_FLAG 0x00001000 + CP_DISABLED_QUICK_FLAG 0x00002000 + CP_RESIZEFS_FLAG 0x00004000 + =============================== ============================== + What: /sys/fs/f2fs//ckpt_thread_ioprio Date: January 2021 Contact: "Daeho Jeong" diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index bfc2a5b74ed3..dac786fb6f80 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -25,21 +25,25 @@ * then this function isn't applicable. This function may sleep, so it must be * called from a workqueue rather than from the bio's bi_end_io callback. * - * This function sets PG_error on any pages that contain any blocks that failed - * to be decrypted. The filesystem must not mark such pages uptodate. + * Return: %true on success; %false on failure. On failure, bio->bi_status is + * also set to an error status. */ -void fscrypt_decrypt_bio(struct bio *bio) +bool fscrypt_decrypt_bio(struct bio *bio) { struct bio_vec *bv; struct bvec_iter_all iter_all; bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, + int err = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, bv->bv_offset); - if (ret) - SetPageError(page); + + if (err) { + bio->bi_status = errno_to_blk_status(err); + return false; + } } + return true; } EXPORT_SYMBOL(fscrypt_decrypt_bio); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 3b44b99d0047..6581bfec2161 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -205,7 +205,7 @@ struct fscrypt_symlink_data { struct fscrypt_prepared_key { struct crypto_skcipher *tfm; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - struct fscrypt_blk_crypto_key *blk_key; + struct blk_crypto_key *blk_key; #endif }; @@ -246,7 +246,7 @@ struct fscrypt_info { * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ - struct key *ci_master_key; + struct fscrypt_master_key *ci_master_key; /* * Link in list of inodes that were unlocked with the master key. @@ -371,7 +371,8 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, bool is_hw_wrapped, const struct fscrypt_info *ci); -void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key); +void fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key); int fscrypt_derive_sw_secret(struct super_block *sb, const u8 *wrapped_key, unsigned int wrapped_key_size, @@ -423,7 +424,8 @@ fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, } static inline void -fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { } @@ -496,6 +498,40 @@ struct fscrypt_master_key_secret { */ struct fscrypt_master_key { + /* + * Back-pointer to the super_block of the filesystem to which this + * master key has been added. Only valid if ->mk_active_refs > 0. + */ + struct super_block *mk_sb; + + /* + * Link in ->mk_sb->s_master_keys->key_hashtable. + * Only valid if ->mk_active_refs > 0. + */ + struct hlist_node mk_node; + + /* Semaphore that protects ->mk_secret and ->mk_users */ + struct rw_semaphore mk_sem; + + /* + * Active and structural reference counts. An active ref guarantees + * that the struct continues to exist, continues to be in the keyring + * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g. + * ->mk_direct_keys) that have been prepared continue to exist. + * A structural ref only guarantees that the struct continues to exist. + * + * There is one active ref associated with ->mk_secret being present, + * and one active ref for each inode in ->mk_decrypted_inodes. + * + * There is one structural ref associated with the active refcount being + * nonzero. Finding a key in the keyring also takes a structural ref, + * which is then held temporarily while the key is operated on. + */ + refcount_t mk_active_refs; + refcount_t mk_struct_refs; + + struct rcu_head mk_rcu_head; + /* * The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is * executed, this is wiped and no new inodes can be unlocked with this @@ -504,7 +540,10 @@ struct fscrypt_master_key { * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again. * - * Locking: protected by this master key's key->sem. + * While ->mk_secret is present, one ref in ->mk_active_refs is held. + * + * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs + * associated with this field is protected by ->mk_sem as well. */ struct fscrypt_master_key_secret mk_secret; @@ -525,22 +564,12 @@ struct fscrypt_master_key { * * This is NULL for v1 policy keys; those can only be added by root. * - * Locking: in addition to this keyring's own semaphore, this is - * protected by this master key's key->sem, so we can do atomic - * search+insert. It can also be searched without taking any locks, but - * in that case the returned key may have already been removed. + * Locking: protected by ->mk_sem. (We don't just rely on the keyrings + * subsystem semaphore ->mk_users->sem, as we need support for atomic + * search+insert along with proper synchronization with ->mk_secret.) */ struct key *mk_users; - /* - * Length of ->mk_decrypted_inodes, plus one if mk_secret is present. - * Once this goes to 0, the master key is removed from ->s_master_keys. - * The 'struct fscrypt_master_key' will continue to live as long as the - * 'struct key' whose payload it is, but we won't let this reference - * count rise again. - */ - refcount_t mk_refcount; - /* * List of inodes that were unlocked using this key. This allows the * inodes to be evicted efficiently if the key is removed. @@ -566,10 +595,10 @@ static inline bool is_master_key_secret_present(const struct fscrypt_master_key_secret *secret) { /* - * The READ_ONCE() is only necessary for fscrypt_drop_inode() and - * fscrypt_key_describe(). These run in atomic context, so they can't - * take the key semaphore and thus 'secret' can change concurrently - * which would be a data race. But they only need to know whether the + * The READ_ONCE() is only necessary for fscrypt_drop_inode(). + * fscrypt_drop_inode() runs in atomic context, so it can't take the key + * semaphore and thus 'secret' can change concurrently which would be a + * data race. But fscrypt_drop_inode() only need to know whether the * secret *was* present at the time of check, so READ_ONCE() suffices. */ return READ_ONCE(secret->size) != 0; @@ -598,7 +627,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) return 0; } -struct key * +void fscrypt_put_master_key(struct fscrypt_master_key *mk); + +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk); + +struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec); @@ -629,7 +662,8 @@ extern struct fscrypt_mode fscrypt_modes[]; int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_info *ci); -void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); +void fscrypt_destroy_prepared_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key); int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index af74599ae1cf..be5c650e4957 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -5,8 +5,6 @@ * Encryption hooks for higher-level filesystem operations. */ -#include - #include "fscrypt_private.h" /** @@ -142,7 +140,6 @@ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_info *ci; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -158,14 +155,13 @@ int fscrypt_prepare_setflags(struct inode *inode, ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; - key = ci->ci_master_key; - mk = key->payload.data[0]; - down_read(&key->sem); + mk = ci->ci_master_key; + down_read(&mk->mk_sem); if (is_master_key_secret_present(&mk->mk_secret)) err = fscrypt_derive_dirhash_key(ci, mk); else err = -ENOKEY; - up_read(&key->sem); + up_read(&mk->mk_sem); return err; } return 0; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 919c800c8b76..5a6abb8c1470 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -21,26 +21,22 @@ #include "fscrypt_private.h" -struct fscrypt_blk_crypto_key { - struct blk_crypto_key base; - int num_devs; - struct request_queue *devs[]; -}; - -static int fscrypt_get_num_devices(struct super_block *sb) +static struct block_device **fscrypt_get_devices(struct super_block *sb, + unsigned int *num_devs) { - if (sb->s_cop->get_num_devices) - return sb->s_cop->get_num_devices(sb); - return 1; -} + struct block_device **devs; -static void fscrypt_get_devices(struct super_block *sb, int num_devs, - struct request_queue **devs) -{ - if (num_devs == 1) - devs[0] = bdev_get_queue(sb->s_bdev); - else - sb->s_cop->get_devices(sb, devs); + if (sb->s_cop->get_devices) { + devs = sb->s_cop->get_devices(sb, num_devs); + if (devs) + return devs; + } + devs = kmalloc(sizeof(*devs), GFP_KERNEL); + if (!devs) + return ERR_PTR(-ENOMEM); + devs[0] = sb->s_bdev; + *num_devs = 1; + return devs; } static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) @@ -74,15 +70,17 @@ static unsigned int fscrypt_get_dun_bytes(const struct fscrypt_info *ci) * helpful for debugging problems where the "wrong" implementation is used. */ static void fscrypt_log_blk_crypto_impl(struct fscrypt_mode *mode, - struct request_queue **devs, - int num_devs, + struct block_device **devs, + unsigned int num_devs, const struct blk_crypto_config *cfg) { - int i; + unsigned int i; for (i = 0; i < num_devs; i++) { + struct request_queue *q = bdev_get_queue(devs[i]); + if (!IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || - __blk_crypto_cfg_supported(devs[i]->crypto_profile, cfg)) { + __blk_crypto_cfg_supported(q->crypto_profile, cfg)) { if (!xchg(&mode->logged_blk_crypto_native, 1)) pr_info("fscrypt: %s using blk-crypto (native)\n", mode->friendly_name); @@ -100,9 +98,9 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci, const struct inode *inode = ci->ci_inode; struct super_block *sb = inode->i_sb; struct blk_crypto_config crypto_cfg; - int num_devs; - struct request_queue **devs; - int i; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; /* The file must need contents encryption, not filenames encryption */ if (!S_ISREG(inode->i_mode)) @@ -130,8 +128,8 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci, return 0; /* - * On all the filesystem's devices, blk-crypto must support the crypto - * configuration that the file would use. + * On all the filesystem's block devices, blk-crypto must support the + * crypto configuration that the file would use. */ crypto_cfg.crypto_mode = ci->ci_mode->blk_crypto_mode; crypto_cfg.data_unit_size = sb->s_blocksize; @@ -139,14 +137,14 @@ int fscrypt_select_encryption_impl(struct fscrypt_info *ci, crypto_cfg.key_type = is_hw_wrapped_key ? BLK_CRYPTO_KEY_TYPE_HW_WRAPPED : BLK_CRYPTO_KEY_TYPE_STANDARD; - num_devs = fscrypt_get_num_devices(sb); - devs = kmalloc_array(num_devs, sizeof(*devs), GFP_KERNEL); - if (!devs) - return -ENOMEM; - fscrypt_get_devices(sb, num_devs, devs); + + devs = fscrypt_get_devices(sb, &num_devs); + if (IS_ERR(devs)) + return PTR_ERR(devs); for (i = 0; i < num_devs; i++) { - if (!blk_crypto_config_supported(devs[i], &crypto_cfg)) + if (!blk_crypto_config_supported(bdev_get_queue(devs[i]), + &crypto_cfg)) goto out_free_devs; } @@ -170,50 +168,42 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; enum blk_crypto_key_type key_type = is_hw_wrapped ? BLK_CRYPTO_KEY_TYPE_HW_WRAPPED : BLK_CRYPTO_KEY_TYPE_STANDARD; - int num_devs = fscrypt_get_num_devices(sb); - int queue_refs = 0; - struct fscrypt_blk_crypto_key *blk_key; + struct blk_crypto_key *blk_key; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; int err; - int i; - blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_KERNEL); + blk_key = kmalloc(sizeof(*blk_key), GFP_KERNEL); if (!blk_key) return -ENOMEM; - blk_key->num_devs = num_devs; - fscrypt_get_devices(sb, num_devs, blk_key->devs); - - err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, - key_type, crypto_mode, - fscrypt_get_dun_bytes(ci), sb->s_blocksize); + err = blk_crypto_init_key(blk_key, raw_key, raw_key_size, key_type, + crypto_mode, fscrypt_get_dun_bytes(ci), + sb->s_blocksize); if (err) { fscrypt_err(inode, "error %d initializing blk-crypto key", err); goto fail; } - /* - * We have to start using blk-crypto on all the filesystem's devices. - * We also have to save all the request_queue's for later so that the - * key can be evicted from them. This is needed because some keys - * aren't destroyed until after the filesystem was already unmounted - * (namely, the per-mode keys in struct fscrypt_master_key). - */ - for (i = 0; i < num_devs; i++) { - if (!blk_get_queue(blk_key->devs[i])) { - fscrypt_err(inode, "couldn't get request_queue"); - err = -EAGAIN; - goto fail; - } - queue_refs++; - - err = blk_crypto_start_using_key(&blk_key->base, - blk_key->devs[i]); - if (err) { - fscrypt_err(inode, - "error %d starting to use blk-crypto", err); - goto fail; - } + /* Start using blk-crypto on all the filesystem's block devices. */ + devs = fscrypt_get_devices(sb, &num_devs); + if (IS_ERR(devs)) { + err = PTR_ERR(devs); + goto fail; } + for (i = 0; i < num_devs; i++) { + err = blk_crypto_start_using_key(blk_key, + bdev_get_queue(devs[i])); + if (err) + break; + } + kfree(devs); + if (err) { + fscrypt_err(inode, "error %d starting to use blk-crypto", err); + goto fail; + } + /* * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). * I.e., here we publish ->blk_key with a RELEASE barrier so that @@ -224,24 +214,29 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, return 0; fail: - for (i = 0; i < queue_refs; i++) - blk_put_queue(blk_key->devs[i]); kfree_sensitive(blk_key); return err; } -void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +void fscrypt_destroy_inline_crypt_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { - struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key; - int i; + struct blk_crypto_key *blk_key = prep_key->blk_key; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; - if (blk_key) { - for (i = 0; i < blk_key->num_devs; i++) { - blk_crypto_evict_key(blk_key->devs[i], &blk_key->base); - blk_put_queue(blk_key->devs[i]); - } - kfree_sensitive(blk_key); + if (!blk_key) + return; + + /* Evict the key from all the filesystem's block devices. */ + devs = fscrypt_get_devices(sb, &num_devs); + if (!IS_ERR(devs)) { + for (i = 0; i < num_devs; i++) + blk_crypto_evict_key(bdev_get_queue(devs[i]), blk_key); + kfree(devs); } + kfree_sensitive(blk_key); } /* @@ -254,7 +249,10 @@ int fscrypt_derive_sw_secret(struct super_block *sb, const u8 *wrapped_key, u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]) { struct blk_crypto_profile *profile; - int num_devs; + struct block_device **devs; + unsigned int num_devs; + unsigned int i; + int err; /* The filesystem must be mounted with -o inlinecrypt */ if (!(sb->s_flags & SB_INLINECRYPT)) @@ -266,31 +264,22 @@ int fscrypt_derive_sw_secret(struct super_block *sb, const u8 *wrapped_key, * uses block devices with different crypto profiles. This way, there * is no ambiguity about which ->derive_sw_secret method to call. */ - profile = bdev_get_queue(sb->s_bdev)->crypto_profile; - num_devs = fscrypt_get_num_devices(sb); - if (num_devs > 1) { - struct request_queue **devs = - kmalloc_array(num_devs, sizeof(*devs), GFP_KERNEL); - int i; - - if (!devs) - return -ENOMEM; - - fscrypt_get_devices(sb, num_devs, devs); - - for (i = 0; i < num_devs; i++) { - if (devs[i]->crypto_profile != profile) { - fscrypt_warn(NULL, - "unsupported multi-device configuration for hardware-wrapped keys"); - kfree(devs); - return -EOPNOTSUPP; - } + devs = fscrypt_get_devices(sb, &num_devs); + if (IS_ERR(devs)) + return PTR_ERR(devs); + profile = bdev_get_queue(devs[0])->crypto_profile; + for (i = 1; i < num_devs; i++) { + if (bdev_get_queue(devs[i])->crypto_profile != profile) { + fscrypt_warn(NULL, + "unsupported multi-device configuration for hardware-wrapped keys"); + kfree(devs); + return -EOPNOTSUPP; } - kfree(devs); } - - return blk_crypto_derive_sw_secret(profile, wrapped_key, - wrapped_key_size, sw_secret); + err = blk_crypto_derive_sw_secret(profile, wrapped_key, + wrapped_key_size, sw_secret); + kfree(devs); + return err; } bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) @@ -345,7 +334,7 @@ void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, ci = inode->i_crypt_info; fscrypt_generate_dun(ci, first_lblk, dun); - bio_crypt_set_ctx(bio, &ci->ci_enc_key.blk_key->base, dun, gfp_mask); + bio_crypt_set_ctx(bio, ci->ci_enc_key.blk_key, dun, gfp_mask); } EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx); @@ -438,7 +427,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, * uses the same pointer. I.e., there's currently no need to support * merging requests where the keys are the same but the pointers differ. */ - if (bc->bc_key != &inode->i_crypt_info->ci_enc_key.blk_key->base) + if (bc->bc_key != inode->i_crypt_info->ci_enc_key.blk_key) return false; fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); @@ -471,46 +460,45 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); /** - * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is - * supported as far as encryption is concerned - * @iocb: the file and position the I/O is targeting - * @iter: the I/O data segment(s) + * fscrypt_dio_supported() - check whether DIO (direct I/O) is supported on an + * inode, as far as encryption is concerned + * @inode: the inode in question * * Return: %true if there are no encryption constraints that prevent DIO from * being supported; %false if DIO is unsupported. (Note that in the * %true case, the filesystem might have other, non-encryption-related - * constraints that prevent DIO from actually being supported.) + * constraints that prevent DIO from actually being supported. Also, on + * encrypted files the filesystem is still responsible for only allowing + * DIO when requests are filesystem-block-aligned.) */ -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter) +bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - const unsigned int blocksize = i_blocksize(inode); + int err; /* If the file is unencrypted, no veto from us. */ if (!fscrypt_needs_contents_encryption(inode)) return true; - /* We only support DIO with inline crypto, not fs-layer crypto. */ - if (!fscrypt_inode_uses_inline_crypto(inode)) - return false; - /* - * Since the granularity of encryption is filesystem blocks, the file - * position and total I/O length must be aligned to the filesystem block - * size -- not just to the block device's logical block size as is - * traditionally the case for DIO on many filesystems. + * We only support DIO with inline crypto, not fs-layer crypto. * - * We require that the user-provided memory buffers be filesystem block - * aligned too. It is simpler to have a single alignment value required - * for all properties of the I/O, as is normally the case for DIO. - * Also, allowing less aligned buffers would imply that data units could - * cross bvecs, which would greatly complicate the I/O stack, which - * assumes that bios can be split at any bvec boundary. + * To determine whether the inode is using inline crypto, we have to set + * up the key if it wasn't already done. This is because in the current + * design of fscrypt, the decision of whether to use inline crypto or + * not isn't made until the inode's encryption key is being set up. In + * the DIO read/write case, the key will always be set up already, since + * the file will be open. But in the case of statx(), the key might not + * be set up yet, as the file might not have been opened yet. */ - if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize)) + err = fscrypt_require_key(inode); + if (err) { + /* + * Key unavailable or couldn't be set up. This edge case isn't + * worth worrying about; just report that DIO is unsupported. + */ return false; - - return true; + } + return fscrypt_inode_uses_inline_crypto(inode); } EXPORT_SYMBOL_GPL(fscrypt_dio_supported); diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index f954a19cd569..62e607b9b1e6 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -18,6 +18,7 @@ * information about these ioctls. */ +#include #include #include #include @@ -25,6 +26,18 @@ #include "fscrypt_private.h" +/* The master encryption keys for a filesystem (->s_master_keys) */ +struct fscrypt_keyring { + /* + * Lock that protects ->key_hashtable. It does *not* protect the + * fscrypt_master_key structs themselves. + */ + spinlock_t lock; + + /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */ + struct hlist_head key_hashtable[128]; +}; + static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret) { fscrypt_destroy_hkdf(&secret->hkdf); @@ -38,20 +51,73 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst, memzero_explicit(src, sizeof(*src)); } -static void free_master_key(struct fscrypt_master_key *mk) +static void fscrypt_free_master_key(struct rcu_head *head) { + struct fscrypt_master_key *mk = + container_of(head, struct fscrypt_master_key, mk_rcu_head); + /* + * The master key secret and any embedded subkeys should have already + * been wiped when the last active reference to the fscrypt_master_key + * struct was dropped; doing it here would be unnecessarily late. + * Nevertheless, use kfree_sensitive() in case anything was missed. + */ + kfree_sensitive(mk); +} + +void fscrypt_put_master_key(struct fscrypt_master_key *mk) +{ + if (!refcount_dec_and_test(&mk->mk_struct_refs)) + return; + /* + * No structural references left, so free ->mk_users, and also free the + * fscrypt_master_key struct itself after an RCU grace period ensures + * that concurrent keyring lookups can no longer find it. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 0); + key_put(mk->mk_users); + mk->mk_users = NULL; + call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key); +} + +void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk) +{ + struct super_block *sb = mk->mk_sb; + struct fscrypt_keyring *keyring = sb->s_master_keys; size_t i; - wipe_master_key_secret(&mk->mk_secret); + if (!refcount_dec_and_test(&mk->mk_active_refs)) + return; + /* + * No active references left, so complete the full removal of this + * fscrypt_master_key struct by removing it from the keyring and + * destroying any subkeys embedded in it. + */ + + spin_lock(&keyring->lock); + hlist_del_rcu(&mk->mk_node); + spin_unlock(&keyring->lock); + + /* + * ->mk_active_refs == 0 implies that ->mk_secret is not present and + * that ->mk_decrypted_inodes is empty. + */ + WARN_ON(is_master_key_secret_present(&mk->mk_secret)); + WARN_ON(!list_empty(&mk->mk_decrypted_inodes)); for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { - fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); - fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); - fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]); + fscrypt_destroy_prepared_key( + sb, &mk->mk_direct_keys[i]); + fscrypt_destroy_prepared_key( + sb, &mk->mk_iv_ino_lblk_64_keys[i]); + fscrypt_destroy_prepared_key( + sb, &mk->mk_iv_ino_lblk_32_keys[i]); } + memzero_explicit(&mk->mk_ino_hash_key, + sizeof(mk->mk_ino_hash_key)); + mk->mk_ino_hash_key_initialized = false; - key_put(mk->mk_users); - kfree_sensitive(mk); + /* Drop the structural ref associated with the active refs. */ + fscrypt_put_master_key(mk); } static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) @@ -61,44 +127,6 @@ static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) return master_key_spec_len(spec) != 0; } -static int fscrypt_key_instantiate(struct key *key, - struct key_preparsed_payload *prep) -{ - key->payload.data[0] = (struct fscrypt_master_key *)prep->data; - return 0; -} - -static void fscrypt_key_destroy(struct key *key) -{ - free_master_key(key->payload.data[0]); -} - -static void fscrypt_key_describe(const struct key *key, struct seq_file *m) -{ - seq_puts(m, key->description); - - if (key_is_positive(key)) { - const struct fscrypt_master_key *mk = key->payload.data[0]; - - if (!is_master_key_secret_present(&mk->mk_secret)) - seq_puts(m, ": secret removed"); - } -} - -/* - * Type of key in ->s_master_keys. Each key of this type represents a master - * key which has been added to the filesystem. Its payload is a - * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents - * users from adding keys of this type via the keyrings syscalls rather than via - * the intended method of FS_IOC_ADD_ENCRYPTION_KEY. - */ -static struct key_type key_type_fscrypt = { - .name = "._fscrypt", - .instantiate = fscrypt_key_instantiate, - .destroy = fscrypt_key_destroy, - .describe = fscrypt_key_describe, -}; - static int fscrypt_user_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { @@ -131,32 +159,6 @@ static struct key_type key_type_fscrypt_user = { .describe = fscrypt_user_key_describe, }; -/* Search ->s_master_keys or ->mk_users */ -static struct key *search_fscrypt_keyring(struct key *keyring, - struct key_type *type, - const char *description) -{ - /* - * We need to mark the keyring reference as "possessed" so that we - * acquire permission to search it, via the KEY_POS_SEARCH permission. - */ - key_ref_t keyref = make_key_ref(keyring, true /* possessed */); - - keyref = keyring_search(keyref, type, description, false); - if (IS_ERR(keyref)) { - if (PTR_ERR(keyref) == -EAGAIN || /* not found */ - PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ - keyref = ERR_PTR(-ENOKEY); - return ERR_CAST(keyref); - } - return key_ref_to_ptr(keyref); -} - -#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \ - (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id)) - -#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1) - #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \ (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \ CONST_STRLEN("-users") + 1) @@ -164,21 +166,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring, #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \ (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1) -static void format_fs_keyring_description( - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE], - const struct super_block *sb) -{ - sprintf(description, "fscrypt-%s", sb->s_id); -} - -static void format_mk_description( - char description[FSCRYPT_MK_DESCRIPTION_SIZE], - const struct fscrypt_key_specifier *mk_spec) -{ - sprintf(description, "%*phN", - master_key_spec_len(mk_spec), (u8 *)&mk_spec->u); -} - static void format_mk_users_keyring_description( char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE], const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) @@ -199,20 +186,15 @@ static void format_mk_user_description( /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */ static int allocate_filesystem_keyring(struct super_block *sb) { - char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE]; - struct key *keyring; + struct fscrypt_keyring *keyring; if (sb->s_master_keys) return 0; - format_fs_keyring_description(description, sb); - keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, - current_cred(), KEY_POS_SEARCH | - KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); - if (IS_ERR(keyring)) - return PTR_ERR(keyring); - + keyring = kzalloc(sizeof(*keyring), GFP_KERNEL); + if (!keyring) + return -ENOMEM; + spin_lock_init(&keyring->lock); /* * Pairs with the smp_load_acquire() in fscrypt_find_master_key(). * I.e., here we publish ->s_master_keys with a RELEASE barrier so that @@ -222,21 +204,75 @@ static int allocate_filesystem_keyring(struct super_block *sb) return 0; } -void fscrypt_sb_free(struct super_block *sb) +/* + * This is called at unmount time to release all encryption keys that have been + * added to the filesystem, along with the keyring that contains them. + * + * Note that besides clearing and freeing memory, this might need to evict keys + * from the keyslots of an inline crypto engine. Therefore, this must be called + * while the filesystem's underlying block device(s) are still available. + */ +void fscrypt_sb_delete(struct super_block *sb) { - key_put(sb->s_master_keys); + struct fscrypt_keyring *keyring = sb->s_master_keys; + size_t i; + + if (!keyring) + return; + + for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) { + struct hlist_head *bucket = &keyring->key_hashtable[i]; + struct fscrypt_master_key *mk; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { + /* + * Since all inodes were already evicted, every key + * remaining in the keyring should have an empty inode + * list, and should only still be in the keyring due to + * the single active ref associated with ->mk_secret. + * There should be no structural refs beyond the one + * associated with the active ref. + */ + WARN_ON(refcount_read(&mk->mk_active_refs) != 1); + WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); + WARN_ON(!is_master_key_secret_present(&mk->mk_secret)); + wipe_master_key_secret(&mk->mk_secret); + fscrypt_put_master_key_activeref(mk); + } + } + kfree_sensitive(keyring); sb->s_master_keys = NULL; } -/* - * Find the specified master key in ->s_master_keys. - * Returns ERR_PTR(-ENOKEY) if not found. - */ -struct key *fscrypt_find_master_key(struct super_block *sb, - const struct fscrypt_key_specifier *mk_spec) +static struct hlist_head * +fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring, + const struct fscrypt_key_specifier *mk_spec) { - struct key *keyring; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; + /* + * Since key specifiers should be "random" values, it is sufficient to + * use a trivial hash function that just takes the first several bits of + * the key specifier. + */ + unsigned long i = get_unaligned((unsigned long *)&mk_spec->u); + + return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)]; +} + +/* + * Find the specified master key struct in ->s_master_keys and take a structural + * ref to it. The structural ref guarantees that the key struct continues to + * exist, but it does *not* guarantee that ->s_master_keys continues to contain + * the key struct. The structural ref needs to be dropped by + * fscrypt_put_master_key(). Returns NULL if the key struct is not found. + */ +struct fscrypt_master_key * +fscrypt_find_master_key(struct super_block *sb, + const struct fscrypt_key_specifier *mk_spec) +{ + struct fscrypt_keyring *keyring; + struct hlist_head *bucket; + struct fscrypt_master_key *mk; /* * Pairs with the smp_store_release() in allocate_filesystem_keyring(). @@ -246,10 +282,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb, */ keyring = smp_load_acquire(&sb->s_master_keys); if (keyring == NULL) - return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */ + return NULL; /* No keyring yet, so no keys yet. */ - format_mk_description(description, mk_spec); - return search_fscrypt_keyring(keyring, &key_type_fscrypt, description); + bucket = fscrypt_mk_hash_bucket(keyring, mk_spec); + rcu_read_lock(); + switch (mk_spec->type) { + case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + memcmp(mk->mk_spec.u.descriptor, + mk_spec->u.descriptor, + FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: + hlist_for_each_entry_rcu(mk, bucket, mk_node) { + if (mk->mk_spec.type == + FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && + memcmp(mk->mk_spec.u.identifier, + mk_spec->u.identifier, + FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 && + refcount_inc_not_zero(&mk->mk_struct_refs)) + goto out; + } + break; + } + mk = NULL; +out: + rcu_read_unlock(); + return mk; } static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) @@ -277,17 +341,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) static struct key *find_master_key_user(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; + key_ref_t keyref; format_mk_user_description(description, mk->mk_spec.u.identifier); - return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user, - description); + + /* + * We need to mark the keyring reference as "possessed" so that we + * acquire permission to search it, via the KEY_POS_SEARCH permission. + */ + keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/), + &key_type_fscrypt_user, description, false); + if (IS_ERR(keyref)) { + if (PTR_ERR(keyref) == -EAGAIN || /* not found */ + PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ + keyref = ERR_PTR(-ENOKEY); + return ERR_CAST(keyref); + } + return key_ref_to_ptr(keyref); } /* * Give the current user a "key" in ->mk_users. This charges the user's quota * and marks the master key as added by the current user, so that it cannot be - * removed by another user with the key. Either the master key's key->sem must - * be held for write, or the master key must be still undergoing initialization. + * removed by another user with the key. Either ->mk_sem must be held for + * write, or the master key must be still undergoing initialization. */ static int add_master_key_user(struct fscrypt_master_key *mk) { @@ -309,7 +386,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk) /* * Remove the current user's "key" from ->mk_users. - * The master key's key->sem must be held for write. + * ->mk_sem must be held for write. * * Returns 0 if removed, -ENOKEY if not found, or another -errno code. */ @@ -327,63 +404,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk) } /* - * Allocate a new fscrypt_master_key which contains the given secret, set it as - * the payload of a new 'struct key' of type fscrypt, and link the 'struct key' - * into the given keyring. Synchronized by fscrypt_add_key_mutex. + * Allocate a new fscrypt_master_key, transfer the given secret over to it, and + * insert it into sb->s_master_keys. */ -static int add_new_master_key(struct fscrypt_master_key_secret *secret, - const struct fscrypt_key_specifier *mk_spec, - struct key *keyring) +static int add_new_master_key(struct super_block *sb, + struct fscrypt_master_key_secret *secret, + const struct fscrypt_key_specifier *mk_spec) { + struct fscrypt_keyring *keyring = sb->s_master_keys; struct fscrypt_master_key *mk; - char description[FSCRYPT_MK_DESCRIPTION_SIZE]; - struct key *key; int err; mk = kzalloc(sizeof(*mk), GFP_KERNEL); if (!mk) return -ENOMEM; + mk->mk_sb = sb; + init_rwsem(&mk->mk_sem); + refcount_set(&mk->mk_struct_refs, 1); mk->mk_spec = *mk_spec; - move_master_key_secret(&mk->mk_secret, secret); - - refcount_set(&mk->mk_refcount, 1); /* secret is present */ INIT_LIST_HEAD(&mk->mk_decrypted_inodes); spin_lock_init(&mk->mk_decrypted_inodes_lock); if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { err = allocate_master_key_users_keyring(mk); if (err) - goto out_free_mk; + goto out_put; err = add_master_key_user(mk); if (err) - goto out_free_mk; + goto out_put; } - /* - * Note that we don't charge this key to anyone's quota, since when - * ->mk_users is in use those keys are charged instead, and otherwise - * (when ->mk_users isn't in use) only root can add these keys. - */ - format_mk_description(description, mk_spec); - key = key_alloc(&key_type_fscrypt, description, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), - KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW, - KEY_ALLOC_NOT_IN_QUOTA, NULL); - if (IS_ERR(key)) { - err = PTR_ERR(key); - goto out_free_mk; - } - err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL); - key_put(key); - if (err) - goto out_free_mk; + move_master_key_secret(&mk->mk_secret, secret); + refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */ + spin_lock(&keyring->lock); + hlist_add_head_rcu(&mk->mk_node, + fscrypt_mk_hash_bucket(keyring, mk_spec)); + spin_unlock(&keyring->lock); return 0; -out_free_mk: - free_master_key(mk); +out_put: + fscrypt_put_master_key(mk); return err; } @@ -392,42 +455,34 @@ out_free_mk: static int add_existing_master_key(struct fscrypt_master_key *mk, struct fscrypt_master_key_secret *secret) { - struct key *mk_user; - bool rekey; int err; /* * If the current user is already in ->mk_users, then there's nothing to - * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.) + * do. Otherwise, we need to add the user to ->mk_users. (Neither is + * applicable for v1 policy keys, which have NULL ->mk_users.) */ if (mk->mk_users) { - mk_user = find_master_key_user(mk); + struct key *mk_user = find_master_key_user(mk); + if (mk_user != ERR_PTR(-ENOKEY)) { if (IS_ERR(mk_user)) return PTR_ERR(mk_user); key_put(mk_user); return 0; } - } - - /* If we'll be re-adding ->mk_secret, try to take the reference. */ - rekey = !is_master_key_secret_present(&mk->mk_secret); - if (rekey && !refcount_inc_not_zero(&mk->mk_refcount)) - return KEY_DEAD; - - /* Add the current user to ->mk_users, if applicable. */ - if (mk->mk_users) { err = add_master_key_user(mk); - if (err) { - if (rekey && refcount_dec_and_test(&mk->mk_refcount)) - return KEY_DEAD; + if (err) return err; - } } /* Re-add the secret if needed. */ - if (rekey) + if (!is_master_key_secret_present(&mk->mk_secret)) { + if (!refcount_inc_not_zero(&mk->mk_active_refs)) + return KEY_DEAD; move_master_key_secret(&mk->mk_secret, secret); + } + return 0; } @@ -436,38 +491,36 @@ static int do_add_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec) { static DEFINE_MUTEX(fscrypt_add_key_mutex); - struct key *key; + struct fscrypt_master_key *mk; int err; mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */ -retry: - key = fscrypt_find_master_key(sb, mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); - if (err != -ENOKEY) - goto out_unlock; + + mk = fscrypt_find_master_key(sb, mk_spec); + if (!mk) { /* Didn't find the key in ->s_master_keys. Add it. */ err = allocate_filesystem_keyring(sb); - if (err) - goto out_unlock; - err = add_new_master_key(secret, mk_spec, sb->s_master_keys); + if (!err) + err = add_new_master_key(sb, secret, mk_spec); } else { /* * Found the key in ->s_master_keys. Re-add the secret if * needed, and add the user to ->mk_users if needed. */ - down_write(&key->sem); - err = add_existing_master_key(key->payload.data[0], secret); - up_write(&key->sem); + down_write(&mk->mk_sem); + err = add_existing_master_key(mk, secret); + up_write(&mk->mk_sem); if (err == KEY_DEAD) { - /* Key being removed or needs to be removed */ - key_invalidate(key); - key_put(key); - goto retry; + /* + * We found a key struct, but it's already been fully + * removed. Ignore the old struct and add a new one. + * fscrypt_add_key_mutex means we don't need to worry + * about concurrent adds. + */ + err = add_new_master_key(sb, secret, mk_spec); } - key_put(key); + fscrypt_put_master_key(mk); } -out_unlock: mutex_unlock(&fscrypt_add_key_mutex); return err; } @@ -802,19 +855,19 @@ int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { struct fscrypt_key_specifier mk_spec; - struct key *key, *mk_user; struct fscrypt_master_key *mk; + struct key *mk_user; int err; mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); - key = fscrypt_find_master_key(sb, &mk_spec); - if (IS_ERR(key)) { - err = PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &mk_spec); + if (!mk) { + err = -ENOKEY; goto out; } - mk = key->payload.data[0]; + down_read(&mk->mk_sem); mk_user = find_master_key_user(mk); if (IS_ERR(mk_user)) { err = PTR_ERR(mk_user); @@ -822,7 +875,8 @@ int fscrypt_verify_key_added(struct super_block *sb, key_put(mk_user); err = 0; } - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (err == -ENOKEY && capable(CAP_FOWNER)) err = 0; @@ -984,11 +1038,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_remove_key_arg __user *uarg = _uarg; struct fscrypt_remove_key_arg arg; - struct key *key; struct fscrypt_master_key *mk; u32 status_flags = 0; int err; - bool dead; + bool inodes_remain; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; @@ -1008,12 +1061,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) return -EACCES; /* Find the key being removed. */ - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) - return PTR_ERR(key); - mk = key->payload.data[0]; - - down_write(&key->sem); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) + return -ENOKEY; + down_write(&mk->mk_sem); /* If relevant, remove current user's (or all users) claim to the key */ if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -1022,7 +1073,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) else err = remove_master_key_user(mk); if (err) { - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } if (mk->mk_users->keys.nr_leaves_on_tree != 0) { @@ -1034,26 +1085,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) status_flags |= FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS; err = 0; - up_write(&key->sem); + up_write(&mk->mk_sem); goto out_put_key; } } /* No user claims remaining. Go ahead and wipe the secret. */ - dead = false; + err = -ENOKEY; if (is_master_key_secret_present(&mk->mk_secret)) { wipe_master_key_secret(&mk->mk_secret); - dead = refcount_dec_and_test(&mk->mk_refcount); - } - up_write(&key->sem); - if (dead) { - /* - * No inodes reference the key, and we wiped the secret, so the - * key object is free to be removed from the keyring. - */ - key_invalidate(key); + fscrypt_put_master_key_activeref(mk); err = 0; - } else { + } + inodes_remain = refcount_read(&mk->mk_active_refs) > 0; + up_write(&mk->mk_sem); + + if (inodes_remain) { /* Some inodes still reference this key; try to evict them. */ err = try_to_lock_encrypted_files(sb, mk); if (err == -EBUSY) { @@ -1069,7 +1116,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) * has been fully removed including all files locked. */ out_put_key: - key_put(key); + fscrypt_put_master_key(mk); if (err == 0) err = put_user(status_flags, &uarg->removal_status_flags); return err; @@ -1116,7 +1163,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_get_key_status_arg arg; - struct key *key; struct fscrypt_master_key *mk; int err; @@ -1133,19 +1179,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) arg.user_count = 0; memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved)); - key = fscrypt_find_master_key(sb, &arg.key_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY)) - return PTR_ERR(key); + mk = fscrypt_find_master_key(sb, &arg.key_spec); + if (!mk) { arg.status = FSCRYPT_KEY_STATUS_ABSENT; err = 0; goto out; } - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); if (!is_master_key_secret_present(&mk->mk_secret)) { - arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED; + arg.status = refcount_read(&mk->mk_active_refs) > 0 ? + FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED : + FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */; err = 0; goto out_release_key; } @@ -1167,8 +1212,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) } err = 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); out: if (!err && copy_to_user(uarg, &arg, sizeof(arg))) err = -EFAULT; @@ -1180,13 +1225,9 @@ int __init fscrypt_init_keyring(void) { int err; - err = register_key_type(&key_type_fscrypt); - if (err) - return err; - err = register_key_type(&key_type_fscrypt_user); if (err) - goto err_unregister_fscrypt; + return err; err = register_key_type(&key_type_fscrypt_provisioning); if (err) @@ -1196,7 +1237,5 @@ int __init fscrypt_init_keyring(void) err_unregister_fscrypt_user: unregister_key_type(&key_type_fscrypt_user); -err_unregister_fscrypt: - unregister_key_type(&key_type_fscrypt); return err; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 07c2eb7ed43f..8b0237241c2f 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -9,7 +9,6 @@ */ #include -#include #include #include "fscrypt_private.h" @@ -170,10 +169,12 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, } /* Destroy a crypto transform object and/or blk-crypto key. */ -void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) +void fscrypt_destroy_prepared_key(struct super_block *sb, + struct fscrypt_prepared_key *prep_key) { crypto_free_skcipher(prep_key->tfm); - fscrypt_destroy_inline_crypt_key(prep_key); + fscrypt_destroy_inline_crypt_key(sb, prep_key); + memzero_explicit(prep_key, sizeof(*prep_key)); } /* Given a per-file encryption key, set up the file's crypto transform object */ @@ -458,31 +459,28 @@ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, /* * Find the master key, then set up the inode's actual encryption key. * - * If the master key is found in the filesystem-level keyring, then the - * corresponding 'struct key' is returned in *master_key_ret with its semaphore - * read-locked. This is needed to ensure that only one task links the - * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create - * an fscrypt_info for the same inode), and to synchronize the master key being - * removed with a new inode starting to use it. + * If the master key is found in the filesystem-level keyring, then it is + * returned in *mk_ret with its semaphore read-locked. This is needed to ensure + * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as + * multiple tasks may race to create an fscrypt_info for the same inode), and to + * synchronize the master key being removed with a new inode starting to use it. */ static int setup_file_encryption_key(struct fscrypt_info *ci, bool need_dirhash_key, - struct key **master_key_ret) + struct fscrypt_master_key **mk_ret) { - struct key *key; - struct fscrypt_master_key *mk = NULL; struct fscrypt_key_specifier mk_spec; + struct fscrypt_master_key *mk; int err; err = fscrypt_policy_to_key_spec(&ci->ci_policy, &mk_spec); if (err) return err; - key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); - if (IS_ERR(key)) { - if (key != ERR_PTR(-ENOKEY) || - ci->ci_policy.version != FSCRYPT_POLICY_V1) - return PTR_ERR(key); + mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); + if (!mk) { + if (ci->ci_policy.version != FSCRYPT_POLICY_V1) + return -ENOKEY; err = fscrypt_select_encryption_impl(ci, false); if (err) @@ -496,9 +494,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, */ return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci); } - - mk = key->payload.data[0]; - down_read(&key->sem); + down_read(&mk->mk_sem); /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */ if (!is_master_key_secret_present(&mk->mk_secret)) { @@ -538,18 +534,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, if (err) goto out_release_key; - *master_key_ret = key; + *mk_ret = mk; return 0; out_release_key: - up_read(&key->sem); - key_put(key); + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); return err; } static void put_crypt_info(struct fscrypt_info *ci) { - struct key *key; + struct fscrypt_master_key *mk; if (!ci) return; @@ -557,26 +553,21 @@ static void put_crypt_info(struct fscrypt_info *ci) if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); else if (ci->ci_owns_key) - fscrypt_destroy_prepared_key(&ci->ci_enc_key); - - key = ci->ci_master_key; - if (key) { - struct fscrypt_master_key *mk = key->payload.data[0]; + fscrypt_destroy_prepared_key(ci->ci_inode->i_sb, + &ci->ci_enc_key); + mk = ci->ci_master_key; + if (mk) { /* * Remove this inode from the list of inodes that were unlocked - * with the master key. - * - * In addition, if we're removing the last inode from a key that - * already had its secret removed, invalidate the key so that it - * gets removed from ->s_master_keys. + * with the master key. In addition, if we're removing the last + * inode from a master key struct that already had its secret + * removed, then complete the full removal of the struct. */ spin_lock(&mk->mk_decrypted_inodes_lock); list_del(&ci->ci_master_key_link); spin_unlock(&mk->mk_decrypted_inodes_lock); - if (refcount_dec_and_test(&mk->mk_refcount)) - key_invalidate(key); - key_put(key); + fscrypt_put_master_key_activeref(mk); } memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_info_cachep, ci); @@ -590,7 +581,7 @@ fscrypt_setup_encryption_info(struct inode *inode, { struct fscrypt_info *crypt_info; struct fscrypt_mode *mode; - struct key *master_key = NULL; + struct fscrypt_master_key *mk = NULL; int res; res = fscrypt_initialize(inode->i_sb->s_cop->flags); @@ -613,8 +604,7 @@ fscrypt_setup_encryption_info(struct inode *inode, WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); crypt_info->ci_mode = mode; - res = setup_file_encryption_key(crypt_info, need_dirhash_key, - &master_key); + res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); if (res) goto out; @@ -629,12 +619,9 @@ fscrypt_setup_encryption_info(struct inode *inode, * We won the race and set ->i_crypt_info to our crypt_info. * Now link it into the master key's inode list. */ - if (master_key) { - struct fscrypt_master_key *mk = - master_key->payload.data[0]; - - refcount_inc(&mk->mk_refcount); - crypt_info->ci_master_key = key_get(master_key); + if (mk) { + crypt_info->ci_master_key = mk; + refcount_inc(&mk->mk_active_refs); spin_lock(&mk->mk_decrypted_inodes_lock); list_add(&crypt_info->ci_master_key_link, &mk->mk_decrypted_inodes); @@ -644,9 +631,9 @@ fscrypt_setup_encryption_info(struct inode *inode, } res = 0; out: - if (master_key) { - up_read(&master_key->sem); - key_put(master_key); + if (mk) { + up_read(&mk->mk_sem); + fscrypt_put_master_key(mk); } put_crypt_info(crypt_info); return res; @@ -811,7 +798,6 @@ EXPORT_SYMBOL(fscrypt_free_inode); int fscrypt_drop_inode(struct inode *inode) { const struct fscrypt_info *ci = fscrypt_get_info(inode); - const struct fscrypt_master_key *mk; /* * If ci is NULL, then the inode doesn't have an encryption key set up @@ -821,7 +807,6 @@ int fscrypt_drop_inode(struct inode *inode) */ if (!ci || !ci->ci_master_key) return 0; - mk = ci->ci_master_key->payload.data[0]; /* * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes @@ -840,6 +825,6 @@ int fscrypt_drop_inode(struct inode *inode) * then the thread removing the key will either evict the inode itself * or will correctly detect that it wasn't evicted due to the race. */ - return !is_master_key_secret_present(&mk->mk_secret); + return !is_master_key_secret_present(&ci->ci_master_key->mk_secret); } EXPORT_SYMBOL_GPL(fscrypt_drop_inode); diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index b2f9031de2c0..ee8246382805 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -144,6 +144,7 @@ invalid: /* Master key referenced by DIRECT_KEY policy */ struct fscrypt_direct_key { + struct super_block *dk_sb; struct hlist_node dk_node; refcount_t dk_refcount; const struct fscrypt_mode *dk_mode; @@ -155,7 +156,7 @@ struct fscrypt_direct_key { static void free_direct_key(struct fscrypt_direct_key *dk) { if (dk) { - fscrypt_destroy_prepared_key(&dk->dk_key); + fscrypt_destroy_prepared_key(dk->dk_sb, &dk->dk_key); kfree_sensitive(dk); } } @@ -232,6 +233,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) dk = kzalloc(sizeof(*dk), GFP_KERNEL); if (!dk) return ERR_PTR(-ENOMEM); + dk->dk_sb = ci->ci_inode->i_sb; refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci); diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 8a054e6d1e68..b1f3427c4b46 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -721,12 +721,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data) * delayed key setup that requires the inode number. */ if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && - (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { - const struct fscrypt_master_key *mk = - ci->ci_master_key->payload.data[0]; - - fscrypt_hash_inode_number(ci, mk); - } + (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) + fscrypt_hash_inode_number(ci, ci->ci_master_key); return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b5efe92b5c6d..67b00bf562da 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -40,8 +40,13 @@ static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); - if (!fscrypt_dio_supported(iocb, iter)) - return false; + if (IS_ENCRYPTED(inode)) { + if (!fscrypt_dio_supported(inode)) + return false; + if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), + i_blocksize(inode))) + return false; + } if (fsverity_active(inode)) return false; if (ext4_should_journal_data(inode)) diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3db923403505..35b1894eef3b 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -76,7 +76,7 @@ static void __read_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, iter_all) { page = bv->bv_page; - /* PG_error was set if any post_read step failed */ + /* PG_error was set if verity failed. */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); /* will re-read again later */ @@ -97,10 +97,12 @@ static void decrypt_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; - fscrypt_decrypt_bio(ctx->bio); - - bio_post_read_processing(ctx); + if (fscrypt_decrypt_bio(bio)) + bio_post_read_processing(ctx); + else + __read_end_io(bio); } static void verity_work(struct work_struct *work) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index af9fd6cafa2b..e31e64d36351 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -26,12 +26,16 @@ static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason) { f2fs_build_fault_attr(sbi, 0, 0); set_ckpt_flags(sbi, CP_ERROR_FLAG); - if (!end_io) + if (!end_io) { f2fs_flush_merged_writes(sbi); + + f2fs_handle_stop(sbi, reason); + } } /* @@ -89,7 +93,7 @@ repeat: return ERR_PTR(err); } - f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, F2FS_BLKSIZE); lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -122,7 +126,7 @@ retry: if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE); } return page; } @@ -140,7 +144,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, unsigned int segno, offset; bool exist; - if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ) + if (type == DATA_GENERIC) return true; segno = GET_SEGNO(sbi, blkaddr); @@ -148,6 +152,13 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, se = get_seg_entry(sbi, segno); exist = f2fs_test_bit(offset, se->cur_valid_map); + if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return exist; + } + if (!exist && type == DATA_GENERIC_ENHANCE) { f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", blkaddr, exist); @@ -185,6 +196,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC: case DATA_GENERIC_ENHANCE: case DATA_GENERIC_ENHANCE_READ: + case DATA_GENERIC_ENHANCE_UPDATE: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { f2fs_warn(sbi, "access invalid blkaddr:%u", @@ -276,7 +288,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, f2fs_put_page(page, err ? 1 : 0); if (!err) - f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, + F2FS_BLKSIZE); } out: blk_finish_plug(&plug); @@ -447,8 +460,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + if (__set_page_dirty_nobuffers(page)) { inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); set_page_private_reference(page); return 1; @@ -1054,7 +1066,8 @@ void f2fs_remove_dirty_inode(struct inode *inode) spin_unlock(&sbi->inode_lock[type]); } -int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, + bool from_cp) { struct list_head *head; struct inode *inode; @@ -1089,11 +1102,15 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; - F2FS_I(inode)->cp_task = current; + if (from_cp) + F2FS_I(inode)->cp_task = current; + F2FS_I(inode)->wb_task = current; filemap_fdatawrite(inode->i_mapping); - F2FS_I(inode)->cp_task = NULL; + F2FS_I(inode)->wb_task = NULL; + if (from_cp) + F2FS_I(inode)->cp_task = NULL; iput(inode); /* We need to give cpu to another writers. */ @@ -1222,7 +1239,7 @@ retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); - err = f2fs_sync_dirty_inodes(sbi, DIR_INODE); + err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true); if (err) return err; cond_resched(); @@ -1893,15 +1910,27 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi) void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi) { struct ckpt_req_control *cprc = &sbi->cprc_info; + struct task_struct *ckpt_task; - if (cprc->f2fs_issue_ckpt) { - struct task_struct *ckpt_task = cprc->f2fs_issue_ckpt; + if (!cprc->f2fs_issue_ckpt) + return; - cprc->f2fs_issue_ckpt = NULL; - kthread_stop(ckpt_task); + ckpt_task = cprc->f2fs_issue_ckpt; + cprc->f2fs_issue_ckpt = NULL; + kthread_stop(ckpt_task); - flush_remained_ckpt_reqs(sbi, NULL); - } + f2fs_flush_ckpt_thread(sbi); +} + +void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi) +{ + struct ckpt_req_control *cprc = &sbi->cprc_info; + + flush_remained_ckpt_reqs(sbi, NULL); + + /* Let's wait for the previous dispatched checkpoint. */ + while (atomic_read(&cprc->queued_ckpt)) + io_schedule_timeout(DEFAULT_IO_TIMEOUT); } void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 24f3a8807195..d5d199579dda 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -761,6 +761,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { ret = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); goto out_release; } @@ -911,17 +912,15 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) reason = "[C|*|C|*]"; goto out; } - if (compressed) { - if (!__is_valid_data_blkaddr(blkaddr)) { - if (!cluster_end) - cluster_end = i; - continue; - } - /* [COMPR_ADDR, NULL_ADDR or NEW_ADDR, valid_blkaddr] */ - if (cluster_end) { - reason = "[C|N|N|V]"; - goto out; - } + if (!__is_valid_data_blkaddr(blkaddr)) { + if (!cluster_end) + cluster_end = i; + continue; + } + /* [COMPR_ADDR, NULL_ADDR or NEW_ADDR, valid_blkaddr] */ + if (cluster_end) { + reason = "[C|N|N|V]"; + goto out; } } return false; @@ -951,6 +950,7 @@ static int __f2fs_cluster_blocks(struct inode *inode, if (f2fs_sanity_check_cluster(&dn)) { ret = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_CLUSTER); goto fail; } @@ -1567,12 +1567,8 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, if (!dic->cbuf) return -ENOMEM; - if (cops->init_decompress_ctx) { - int ret = cops->init_decompress_ctx(dic); - - if (ret) - return ret; - } + if (cops->init_decompress_ctx) + return cops->init_decompress_ctx(dic); return 0; } @@ -1904,7 +1900,7 @@ bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { - struct address_space *mapping = sbi->compress_inode->i_mapping; + struct address_space *mapping = COMPRESS_MAPPING(sbi); struct pagevec pvec; pgoff_t index = 0; pgoff_t end = MAX_BLKADDR(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c1c0b04acbb4..130277a04cd6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -140,7 +140,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) continue; } - /* PG_error was set if decryption or verity failed. */ + /* PG_error was set if verity failed. */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); /* will re-read again later */ @@ -186,7 +186,7 @@ static void f2fs_verify_bio(struct work_struct *work) struct page *page = bv->bv_page; if (!f2fs_is_compressed_page(page) && - !PageError(page) && !fsverity_verify_page(page)) + !fsverity_verify_page(page)) SetPageError(page); } } else { @@ -237,10 +237,9 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, bio_for_each_segment_all(bv, ctx->bio, iter_all) { struct page *page = bv->bv_page; - /* PG_error was set if decryption failed. */ if (f2fs_is_compressed_page(page)) - f2fs_end_read_compressed_page(page, PageError(page), - blkaddr, in_task); + f2fs_end_read_compressed_page(page, false, blkaddr, + in_task); else all_compressed = false; @@ -260,14 +259,17 @@ static void f2fs_post_read_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; - if (ctx->enabled_steps & STEP_DECRYPT) - fscrypt_decrypt_bio(ctx->bio); + if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) { + f2fs_finish_read_bio(bio, true); + return; + } if (ctx->enabled_steps & STEP_DECOMPRESS) f2fs_handle_step_decompress(ctx, true); - f2fs_verify_and_finish_bio(ctx->bio, true); + f2fs_verify_and_finish_bio(bio, true); } static void f2fs_read_end_io(struct bio *bio) @@ -334,7 +336,8 @@ static void f2fs_write_end_io(struct bio *bio) mempool_free(page, sbi->write_io_dummy); if (unlikely(bio->bi_status)) - f2fs_stop_checkpoint(sbi, true); + f2fs_stop_checkpoint(sbi, true, + STOP_CP_REASON_WRITE_FAIL); continue; } @@ -350,7 +353,8 @@ static void f2fs_write_end_io(struct bio *bio) if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) - f2fs_stop_checkpoint(sbi, true); + f2fs_stop_checkpoint(sbi, true, + STOP_CP_REASON_WRITE_FAIL); } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && @@ -708,8 +712,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? - META_GENERIC : DATA_GENERIC_ENHANCE))) + META_GENERIC : DATA_GENERIC_ENHANCE))) { + f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } trace_f2fs_submit_page_bio(page, fio); @@ -728,7 +734,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page): WB_DATA_TYPE(fio->page)); + __read_io_type(page) : WB_DATA_TYPE(fio->page)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -909,8 +915,10 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) { + f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } trace_f2fs_submit_page_bio(page, fio); @@ -1088,7 +1096,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); - f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); __submit_bio(sbi, bio, DATA); return 0; } @@ -1220,6 +1228,8 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_INVALID_BLKADDR); goto put_err; } goto got_it; @@ -1240,6 +1250,8 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_INVALID_BLKADDR); goto put_err; } got_it: @@ -1553,6 +1565,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto sync_out; } @@ -1598,6 +1611,8 @@ next_block: (flag != F2FS_GET_BLOCK_FIEMAP || IS_ENABLED(CONFIG_F2FS_CHECK_FS))) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, + ERROR_CORRUPTED_CLUSTER); goto sync_out; } if (flag == F2FS_GET_BLOCK_BMAP) { @@ -1821,7 +1836,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); trace_f2fs_fiemap(inode, 0, phys, len, flags, err); - if (err || err == 1) + if (err) return err; } @@ -2085,6 +2100,8 @@ got_it: if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_INVALID_BLKADDR); goto out; } } else { @@ -2133,7 +2150,8 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); - f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO, + F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2287,8 +2305,7 @@ submit_and_realloc: refcount_inc(&dic->refcnt); inc_page_count(sbi, F2FS_RD_DATA); - f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); - f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -2559,7 +2576,7 @@ bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) return true; /* if this is cold file, we should overwrite to avoid fragmentation */ - if (file_is_cold(inode)) + if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE)) return true; return check_inplace_update_policy(inode, fio); @@ -2633,8 +2650,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC_ENHANCE)) + DATA_GENERIC_ENHANCE)) { + f2fs_handle_error(fio->sbi, + ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } ipu_force = true; fio->need_lock = LOCK_DONE; @@ -2662,6 +2682,7 @@ got_it: !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; + f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); goto out_writepage; } @@ -2872,7 +2893,7 @@ out: } unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task && allow_balance) + !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -3172,7 +3193,7 @@ static inline bool __should_serialize_io(struct inode *inode, struct writeback_control *wbc) { /* to avoid deadlock in path of data flush */ - if (F2FS_I(inode)->cp_task) + if (F2FS_I(inode)->wb_task) return false; if (!S_ISREG(inode->i_mode)) @@ -3576,6 +3597,7 @@ repeat: if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; } err = f2fs_submit_page_read(inode, page, blkaddr, 0, true); @@ -3714,8 +3736,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (PageSwapCache(page)) return __set_page_dirty_nobuffers(page); - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + if (__set_page_dirty_nobuffers(page)) { f2fs_update_dirty_page(inode, page); return 1; } @@ -4023,6 +4044,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret < 0) return ret; + stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return ret; @@ -4032,6 +4054,7 @@ static void f2fs_swap_deactivate(struct file *file) { struct inode *inode = file_inode(file); + stat_dec_swapfile_inode(inode); clear_inode_flag(inode, FI_PIN_FILE); } #else diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c01471573977..a216dcdf6941 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -91,7 +91,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; - si->aw_cnt = sbi->atomic_files; + si->aw_cnt = atomic_read(&sbi->atomic_files); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ); si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE); @@ -135,6 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_dir = atomic_read(&sbi->inline_dir); si->compr_inode = atomic_read(&sbi->compr_inode); + si->swapfile_inode = atomic_read(&sbi->swapfile_inode); si->compr_blocks = atomic64_read(&sbi->compr_blocks); si->append = sbi->im[APPEND_INO].ino_num; si->update = sbi->im[UPDATE_INO].ino_num; @@ -347,7 +348,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, - f2fs_readonly(si->sbi->sb) ? "RO": "RW", + f2fs_readonly(si->sbi->sb) ? "RO" : "RW", is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? "Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good")); if (si->sbi->s_flag) { @@ -385,6 +386,8 @@ static int stat_show(struct seq_file *s, void *v) si->inline_dir); seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n", si->compr_inode, si->compr_blocks); + seq_printf(s, " - Swapfile Inode: %u\n", + si->swapfile_inode); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", @@ -607,6 +610,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->compr_inode, 0); atomic64_set(&sbi->compr_blocks, 0); + atomic_set(&sbi->swapfile_inode, 0); + atomic_set(&sbi->atomic_files, 0); atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 0a3b6303363d..4d50ba2e0b0b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1041,6 +1041,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT); goto out; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 866e72b29bd5..932c070173b9 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -544,7 +544,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (!et) return; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0); write_lock(&et->lock); @@ -583,7 +583,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, org_end = dei.fofs + dei.len; f2fs_bug_on(sbi, pos >= org_end); - if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { + if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { en->ei.len = pos - en->ei.fofs; prev_en = en; parts = 1; @@ -675,7 +675,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen); + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) @@ -804,9 +804,8 @@ void f2fs_drop_extent_tree(struct inode *inode) if (!f2fs_may_extent_tree(inode)) return; - set_inode_flag(inode, FI_NO_EXTENT); - write_lock(&et->lock); + set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); if (et->largest.len) { et->largest.len = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 783dafc6f40c..409bcf3a7035 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -263,6 +263,10 @@ enum { * condition of read on truncated area * by extent_cache */ + DATA_GENERIC_ENHANCE_UPDATE, /* + * strong check on range and segment + * bitmap for update case + */ META_GENERIC, }; @@ -271,7 +275,7 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ - TRANS_DIR_INO, /* for trasactions dir ino list */ + TRANS_DIR_INO, /* for transactions dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -779,6 +783,7 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ struct task_struct *cp_task; /* separate cp/wb IO stats*/ + struct task_struct *wb_task; /* indicate inode is in context of writeback */ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ spinlock_t i_size_lock; /* protect last_disk_size */ @@ -1155,7 +1160,10 @@ enum iostat_type { APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ + APP_BUFFERED_CDATA_IO, /* app buffered write IOs on compressed file */ + APP_MAPPED_CDATA_IO, /* app mapped write IOs on compressed file */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ + FS_CDATA_IO, /* data IOs from kworker/fsync/reclaimer on compressed file */ FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */ FS_META_IO, /* meta IOs from kworker/reclaimer */ FS_GC_DATA_IO, /* data IOs from forground gc */ @@ -1169,6 +1177,8 @@ enum iostat_type { APP_BUFFERED_READ_IO, /* app buffered read IOs */ APP_READ_IO, /* app read IOs */ APP_MAPPED_READ_IO, /* app mapped read IOs */ + APP_BUFFERED_CDATA_READ_IO, /* app buffered read IOs on compressed file */ + APP_MAPPED_CDATA_READ_IO, /* app mapped read IOs on compressed file */ FS_DATA_READ_IO, /* data read IOs */ FS_GDATA_READ_IO, /* data read IOs from background gc */ FS_CDATA_READ_IO, /* compressed data read IOs */ @@ -1244,7 +1254,6 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ - ATOMIC_FILE, /* for all atomic files */ NR_INODE_TYPE, }; @@ -1723,11 +1732,9 @@ struct f2fs_sb_info { unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ spinlock_t gc_urgent_high_lock; - bool gc_urgent_high_limited; /* indicates having limited trial count */ unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */ /* for skip statistic */ - unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ @@ -1758,6 +1765,8 @@ struct f2fs_sb_info { atomic_t inline_dir; /* # of inline_dentry inodes */ atomic_t compr_inode; /* # of compressed inodes */ atomic64_t compr_blocks; /* # of compressed blocks */ + atomic_t swapfile_inode; /* # of swapfile inodes */ + atomic_t atomic_files; /* # of opened atomic file */ atomic_t max_aw_cnt; /* max # of atomic writes */ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ @@ -1803,6 +1812,10 @@ struct f2fs_sb_info { struct workqueue_struct *post_read_wq; /* post read workqueue */ + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */ + spinlock_t error_lock; /* protect errors array */ + bool error_dirty; /* errors of sb is dirty */ + struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ unsigned int inline_xattr_slab_size; /* default inline xattr slab size */ @@ -2522,7 +2535,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) - return &ckpt->sit_nat_version_bitmap; + return tmp_ptr; else return (unsigned char *)ckpt + F2FS_BLKSIZE; } else { @@ -3538,6 +3551,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); int f2fs_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); +void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason); +void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); @@ -3697,7 +3712,9 @@ static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) /* * checkpoint.c */ -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io); +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason); +void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi); struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); @@ -3727,7 +3744,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi); int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi); void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); -int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); +int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type, + bool from_cp); void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); @@ -3854,7 +3872,7 @@ struct f2fs_stat_info { int nr_issued_ckpt, nr_total_ckpt, nr_queued_ckpt; unsigned int cur_ckpt_time, peak_ckpt_time; int inline_xattr, inline_inode, inline_dir, append, update, orphans; - int compr_inode; + int compr_inode, swapfile_inode; unsigned long long compr_blocks; int aw_cnt, max_aw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; @@ -3943,6 +3961,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) (atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks)) #define stat_sub_compr_blocks(inode, blocks) \ (atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_inc_swapfile_inode(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->swapfile_inode)) +#define stat_dec_swapfile_inode(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->swapfile_inode)) +#define stat_inc_atomic_inode(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->atomic_files)) +#define stat_dec_atomic_inode(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->atomic_files)) #define stat_inc_meta_count(sbi, blkaddr) \ do { \ if (blkaddr < SIT_I(sbi)->sit_base_addr) \ @@ -3962,7 +3988,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) (atomic_inc(&(sbi)->inplace_count)) #define stat_update_max_atomic_write(inode) \ do { \ - int cur = F2FS_I_SB(inode)->atomic_files; \ + int cur = atomic_read(&F2FS_I_SB(inode)->atomic_files); \ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ @@ -4027,6 +4053,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_dec_compr_inode(inode) do { } while (0) #define stat_add_compr_blocks(inode, blocks) do { } while (0) #define stat_sub_compr_blocks(inode, blocks) do { } while (0) +#define stat_inc_swapfile_inode(inode) do { } while (0) +#define stat_dec_swapfile_inode(inode) do { } while (0) +#define stat_inc_atomic_inode(inode) do { } while (0) +#define stat_dec_atomic_inode(inode) do { } while (0) #define stat_update_max_atomic_write(inode) do { } while (0) #define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) @@ -4468,17 +4498,6 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode, f2fs_mark_inode_dirty_sync(inode, true); } -static inline int block_unaligned_IO(struct inode *inode, - struct kiocb *iocb, struct iov_iter *iter) -{ - unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); - unsigned int blocksize_mask = (1 << i_blkbits) - 1; - loff_t offset = iocb->ki_pos; - unsigned long align = offset | iov_iter_alignment(iter); - - return align & blocksize_mask; -} - static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, int flag) { @@ -4489,35 +4508,6 @@ static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, return sbi->aligned_blksize; } -static inline bool f2fs_force_buffered_io(struct inode *inode, - struct kiocb *iocb, struct iov_iter *iter) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int rw = iov_iter_rw(iter); - - if (!fscrypt_dio_supported(iocb, iter)) - return true; - if (fsverity_active(inode)) - return true; - if (f2fs_compressed_file(inode)) - return true; - - /* disallow direct IO if any of devices has unaligned blksize */ - if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) - return true; - - if (f2fs_lfs_mode(sbi) && (rw == WRITE)) { - if (block_unaligned_IO(inode, iocb, iter)) - return true; - if (F2FS_IO_ALIGNED(sbi)) - return true; - } - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) - return true; - - return false; -} - static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) { return fsverity_active(inode) && diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1b6974c836cf..d9884df91d26 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -43,8 +43,8 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); if (!ret) - f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, - F2FS_BLKSIZE); + f2fs_update_iostat(F2FS_I_SB(inode), inode, + APP_MAPPED_READ_IO, F2FS_BLKSIZE); trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); @@ -154,7 +154,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (!PageUptodate(page)) SetPageUptodate(page); - f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); @@ -809,6 +809,34 @@ int f2fs_truncate(struct inode *inode) return 0; } +static bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!fscrypt_dio_supported(inode)) + return true; + if (fsverity_active(inode)) + return true; + if (f2fs_compressed_file(inode)) + return true; + + /* disallow direct IO if any of devices has unaligned blksize */ + if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) + return true; + if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi)) + return true; + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + return true; + + return false; +} + int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -1162,6 +1190,7 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; } @@ -1446,6 +1475,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); break; } @@ -2055,9 +2085,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) } f2fs_i_size_write(fi->cow_inode, i_size_read(inode)); - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - sbi->atomic_files++; - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + stat_inc_atomic_inode(inode); set_inode_flag(inode, FI_ATOMIC_FILE); set_inode_flag(fi->cow_inode, FI_COW_FILE); @@ -2151,7 +2179,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) { if (ret == -EROFS) { ret = 0; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); trace_f2fs_shutdown(sbi, in, ret); } @@ -2164,7 +2193,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = freeze_bdev(sb->s_bdev); if (ret) goto out; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev); break; @@ -2173,16 +2202,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = f2fs_sync_fs(sb, 1); if (ret) goto out; - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NEED_FSCK: @@ -3329,8 +3358,10 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) + DATA_GENERIC_ENHANCE))) { + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } } while (count) { @@ -3491,8 +3522,10 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) + DATA_GENERIC_ENHANCE))) { + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } } while (count) { @@ -3764,6 +3797,8 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; f2fs_put_dnode(&dn); + f2fs_handle_error(sbi, + ERROR_INVALID_BLKADDR); goto out; } @@ -4190,7 +4225,7 @@ static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, if (!(iocb->ki_flags & IOCB_DIRECT)) return false; - if (f2fs_force_buffered_io(inode, iocb, iter)) + if (f2fs_force_buffered_io(inode, iov_iter_rw(iter))) return false; /* @@ -4220,7 +4255,7 @@ static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, dec_page_count(sbi, F2FS_DIO_READ); if (error) return error; - f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size); + f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size); return 0; } @@ -4309,7 +4344,8 @@ skip_read_trace: } else { ret = filemap_read(iocb, to, 0); if (ret > 0) - f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret); + f2fs_update_iostat(F2FS_I_SB(inode), inode, + APP_BUFFERED_READ_IO, ret); } if (trace_f2fs_dataread_end_enabled()) trace_f2fs_dataread_end(inode, pos, ret); @@ -4426,7 +4462,8 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, if (ret > 0) { iocb->ki_pos += ret; - f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret); + f2fs_update_iostat(F2FS_I_SB(inode), inode, + APP_BUFFERED_IO, ret); } return ret; } @@ -4439,7 +4476,7 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, dec_page_count(sbi, F2FS_DIO_WRITE); if (error) return error; - f2fs_update_iostat(sbi, APP_DIRECT_IO, size); + f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); return 0; } @@ -4626,7 +4663,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) skip_write_trace: /* Do the actual write. */ ret = dio ? - f2fs_dio_write_iter(iocb, from, &may_need_sync): + f2fs_dio_write_iter(iocb, from, &may_need_sync) : f2fs_buffered_write_iter(iocb, from); if (trace_f2fs_datawrite_end_enabled()) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a5c0c835c99b..527e68c6e027 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -74,7 +74,8 @@ static int gc_thread_func(void *data) if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_FAULT_INJECT); } if (!sb_start_write_trylock(sbi->sb)) { @@ -97,14 +98,10 @@ static int gc_thread_func(void *data) */ if (sbi->gc_mode == GC_URGENT_HIGH) { spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_limited) { - if (!sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_limited = false; - spin_unlock(&sbi->gc_urgent_high_lock); - sbi->gc_mode = GC_NORMAL; - continue; - } + if (sbi->gc_urgent_high_remaining) { sbi->gc_urgent_high_remaining--; + if (!sbi->gc_urgent_high_remaining) + sbi->gc_mode = GC_NORMAL; } spin_unlock(&sbi->gc_urgent_high_lock); } @@ -1082,7 +1079,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, { struct page *node_page; nid_t nid; - unsigned int ofs_in_node; + unsigned int ofs_in_node, max_addrs; block_t source_blkaddr; nid = le32_to_cpu(sum->nid); @@ -1108,6 +1105,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } + max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE : + DEF_ADDRS_PER_BLOCK; + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", + ofs_in_node, dni->ino, dni->nid, max_addrs); + return false; + } + *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); @@ -1159,6 +1164,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } goto got_it; @@ -1177,6 +1183,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } got_it: @@ -1206,8 +1213,8 @@ got_it: f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); - f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); - f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); return 0; put_encrypted_page: @@ -1307,8 +1314,10 @@ static int move_data_block(struct inode *inode, block_t bidx, goto up_out; } - f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); - f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, inode, FS_DATA_READ_IO, + F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, NULL, FS_GDATA_READ_IO, + F2FS_BLKSIZE); lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || @@ -1360,7 +1369,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_page_out; } - f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); @@ -1707,7 +1716,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_SUMMARY); goto skip; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bf46a7dfbea2..21a495234ffd 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -64,7 +64,6 @@ bool f2fs_may_inline_dentry(struct inode *inode) void f2fs_do_read_inline_data(struct page *page, struct page *ipage) { struct inode *inode = page->mapping->host; - void *src_addr, *dst_addr; if (PageUptodate(page)) return; @@ -74,11 +73,8 @@ void f2fs_do_read_inline_data(struct page *page, struct page *ipage) zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(inode, ipage); - dst_addr = kmap_atomic(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); - flush_dcache_page(page); - kunmap_atomic(dst_addr); + memcpy_to_page(page, 0, inline_data_addr(inode, ipage), + MAX_INLINE_DATA(inode)); if (!PageUptodate(page)) SetPageUptodate(page); } @@ -164,6 +160,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_sbi_flag(fio.sbi, SBI_NEED_FSCK); f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dn->inode->i_ino, dn->data_blkaddr); + f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; } @@ -246,7 +243,6 @@ out: int f2fs_write_inline_data(struct inode *inode, struct page *page) { - void *src_addr, *dst_addr; struct dnode_of_data dn; int err; @@ -263,10 +259,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), page->index); f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - src_addr = kmap_atomic(page); - dst_addr = inline_data_addr(inode, dn.inode_page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); - kunmap_atomic(src_addr); + memcpy_from_page(inline_data_addr(inode, dn.inode_page), + page, 0, MAX_INLINE_DATA(inode)); set_page_dirty(dn.inode_page); f2fs_clear_page_cache_dirty_tag(page); @@ -419,6 +413,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dir->i_ino, dn.data_blkaddr); + f2fs_handle_error(F2FS_P_SB(page), ERROR_INVALID_BLKADDR); err = -EFSCORRUPTED; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 94239ffadced..1f89ec5707df 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -81,8 +81,10 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; - if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) + if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) { + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; + } return 0; } @@ -333,6 +335,16 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return true; } +static void init_idisk_time(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + fi->i_disk_time[0] = inode->i_atime; + fi->i_disk_time[1] = inode->i_ctime; + fi->i_disk_time[2] = inode->i_mtime; + fi->i_disk_time[3] = fi->i_crtime; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -405,6 +417,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } @@ -465,10 +478,7 @@ static int do_read_inode(struct inode *inode) } } - fi->i_disk_time[0] = inode->i_atime; - fi->i_disk_time[1] = inode->i_ctime; - fi->i_disk_time[2] = inode->i_mtime; - fi->i_disk_time[3] = fi->i_crtime; + init_idisk_time(inode); f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -480,6 +490,12 @@ static int do_read_inode(struct inode *inode) return 0; } +static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino) +{ + return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) || + ino == F2FS_COMPRESS_INO(sbi); +} + struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -491,16 +507,22 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { + if (is_meta_ino(sbi, ino)) { + f2fs_err(sbi, "inaccessible inode: %lu, run fsck to repair", ino); + set_sbi_flag(sbi, SBI_NEED_FSCK); + ret = -EFSCORRUPTED; + trace_f2fs_iget_exit(inode, ret); + iput(inode); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + return ERR_PTR(ret); + } + trace_f2fs_iget(inode); return inode; } - if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi)) - goto make_now; -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (ino == F2FS_COMPRESS_INO(sbi)) + if (is_meta_ino(sbi, ino)) goto make_now; -#endif ret = do_read_inode(inode); if (ret) @@ -676,11 +698,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (inode->i_nlink == 0) clear_page_private_inline(node_page); - F2FS_I(inode)->i_disk_time[0] = inode->i_atime; - F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; - F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; - F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; - + init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); #endif @@ -699,7 +717,8 @@ retry: cond_resched(); goto retry; } else if (err != -ENOENT) { - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_UPDATE_INODE); } return; } diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index d84c5f6cc09d..3166a8939ed4 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -31,55 +31,65 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) /* print app write IOs */ seq_puts(seq, "[WRITE]\n"); - seq_printf(seq, "app buffered: %-16llu\n", + seq_printf(seq, "app buffered data: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_IO]); - seq_printf(seq, "app direct: %-16llu\n", + seq_printf(seq, "app direct data: %-16llu\n", sbi->rw_iostat[APP_DIRECT_IO]); - seq_printf(seq, "app mapped: %-16llu\n", + seq_printf(seq, "app mapped data: %-16llu\n", sbi->rw_iostat[APP_MAPPED_IO]); + seq_printf(seq, "app buffered cdata: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_CDATA_IO]); + seq_printf(seq, "app mapped cdata: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_CDATA_IO]); /* print fs write IOs */ - seq_printf(seq, "fs data: %-16llu\n", + seq_printf(seq, "fs data: %-16llu\n", sbi->rw_iostat[FS_DATA_IO]); - seq_printf(seq, "fs node: %-16llu\n", + seq_printf(seq, "fs cdata: %-16llu\n", + sbi->rw_iostat[FS_CDATA_IO]); + seq_printf(seq, "fs node: %-16llu\n", sbi->rw_iostat[FS_NODE_IO]); - seq_printf(seq, "fs meta: %-16llu\n", + seq_printf(seq, "fs meta: %-16llu\n", sbi->rw_iostat[FS_META_IO]); - seq_printf(seq, "fs gc data: %-16llu\n", + seq_printf(seq, "fs gc data: %-16llu\n", sbi->rw_iostat[FS_GC_DATA_IO]); - seq_printf(seq, "fs gc node: %-16llu\n", + seq_printf(seq, "fs gc node: %-16llu\n", sbi->rw_iostat[FS_GC_NODE_IO]); - seq_printf(seq, "fs cp data: %-16llu\n", + seq_printf(seq, "fs cp data: %-16llu\n", sbi->rw_iostat[FS_CP_DATA_IO]); - seq_printf(seq, "fs cp node: %-16llu\n", + seq_printf(seq, "fs cp node: %-16llu\n", sbi->rw_iostat[FS_CP_NODE_IO]); - seq_printf(seq, "fs cp meta: %-16llu\n", + seq_printf(seq, "fs cp meta: %-16llu\n", sbi->rw_iostat[FS_CP_META_IO]); /* print app read IOs */ seq_puts(seq, "[READ]\n"); - seq_printf(seq, "app buffered: %-16llu\n", + seq_printf(seq, "app buffered data: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_READ_IO]); - seq_printf(seq, "app direct: %-16llu\n", + seq_printf(seq, "app direct data: %-16llu\n", sbi->rw_iostat[APP_DIRECT_READ_IO]); - seq_printf(seq, "app mapped: %-16llu\n", + seq_printf(seq, "app mapped data: %-16llu\n", sbi->rw_iostat[APP_MAPPED_READ_IO]); + seq_printf(seq, "app buffered cdata: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO]); + seq_printf(seq, "app mapped cdata: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO]); /* print fs read IOs */ - seq_printf(seq, "fs data: %-16llu\n", + seq_printf(seq, "fs data: %-16llu\n", sbi->rw_iostat[FS_DATA_READ_IO]); - seq_printf(seq, "fs gc data: %-16llu\n", + seq_printf(seq, "fs gc data: %-16llu\n", sbi->rw_iostat[FS_GDATA_READ_IO]); - seq_printf(seq, "fs compr_data: %-16llu\n", + seq_printf(seq, "fs cdata: %-16llu\n", sbi->rw_iostat[FS_CDATA_READ_IO]); - seq_printf(seq, "fs node: %-16llu\n", + seq_printf(seq, "fs node: %-16llu\n", sbi->rw_iostat[FS_NODE_READ_IO]); - seq_printf(seq, "fs meta: %-16llu\n", + seq_printf(seq, "fs meta: %-16llu\n", sbi->rw_iostat[FS_META_READ_IO]); /* print other IOs */ seq_puts(seq, "[OTHER]\n"); - seq_printf(seq, "fs discard: %-16llu\n", + seq_printf(seq, "fs discard: %-16llu\n", sbi->rw_iostat[FS_DISCARD]); return 0; @@ -159,7 +169,7 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) spin_unlock_irq(&sbi->iostat_lat_lock); } -void f2fs_update_iostat(struct f2fs_sb_info *sbi, +void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) { unsigned long flags; @@ -176,6 +186,28 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO) sbi->rw_iostat[APP_READ_IO] += io_bytes; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (inode && f2fs_compressed_file(inode)) { + if (type == APP_BUFFERED_IO) + sbi->rw_iostat[APP_BUFFERED_CDATA_IO] += io_bytes; + + if (type == APP_BUFFERED_READ_IO) + sbi->rw_iostat[APP_BUFFERED_CDATA_READ_IO] += io_bytes; + + if (type == APP_MAPPED_READ_IO) + sbi->rw_iostat[APP_MAPPED_CDATA_READ_IO] += io_bytes; + + if (type == APP_MAPPED_IO) + sbi->rw_iostat[APP_MAPPED_CDATA_IO] += io_bytes; + + if (type == FS_DATA_READ_IO) + sbi->rw_iostat[FS_CDATA_READ_IO] += io_bytes; + + if (type == FS_DATA_IO) + sbi->rw_iostat[FS_CDATA_IO] += io_bytes; + } +#endif + spin_unlock_irqrestore(&sbi->iostat_lock, flags); f2fs_record_iostat(sbi); diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h index 22a2d01f57ef..2c048307b6e0 100644 --- a/fs/f2fs/iostat.h +++ b/fs/f2fs/iostat.h @@ -31,7 +31,7 @@ struct iostat_lat_info { extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset); extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi); -extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, +extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes); struct bio_iostat_ctx { @@ -65,7 +65,7 @@ extern void f2fs_destroy_iostat_processing(void); extern int f2fs_init_iostat(struct f2fs_sb_info *sbi); extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi); #else -static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, +static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) {} static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {} static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 631c5c9db74f..eb654498908d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -36,6 +36,7 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", __func__, nid); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } return 0; @@ -585,7 +586,7 @@ retry: ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); } - up_read(&curseg->journal_rwsem); + up_read(&curseg->journal_rwsem); if (i >= 0) { f2fs_up_read(&nm_i->nat_tree_lock); goto cache; @@ -1295,6 +1296,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; } #endif @@ -1369,7 +1371,7 @@ static int read_node_page(struct page *page, int op_flags) err = f2fs_submit_page_bio(&fio); if (!err) - f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE); return err; } @@ -2146,8 +2148,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (IS_INODE(page)) f2fs_inode_chksum_set(F2FS_P_SB(page), page); #endif - if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + if (__set_page_dirty_nobuffers(page)) { inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); set_page_private_reference(page); return 1; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 329ae992ee0e..4e9e4567c9e7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -473,7 +473,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; - unsigned int offset; + unsigned int offset, ofs_in_node, max_addrs; block_t bidx; int i; @@ -500,15 +500,25 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); + ofs_in_node = le16_to_cpu(sum.ofs_in_node); + + max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode); + if (ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", + ofs_in_node, dn->inode->i_ino, nid, max_addrs); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY); + return -EFSCORRUPTED; + } + if (dn->inode->i_ino == nid) { tdn.nid = nid; if (!dn->inode_page_locked) lock_page(dn->inode_page); tdn.node_page = dn->inode_page; - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } else if (dn->nid == nid) { - tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + tdn.ofs_in_node = ofs_in_node; goto truncate_out; } @@ -627,6 +637,7 @@ retry_dn: inode->i_ino, ofs_of_node(dn.node_page), ofs_of_node(page)); err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; } @@ -639,12 +650,14 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } @@ -698,6 +711,16 @@ retry_prev: goto err; } + if (f2fs_is_valid_blkaddr(sbi, dest, + DATA_GENERIC_ENHANCE_UPDATE)) { + f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", + dest, inode->i_ino, dn.ofs_in_node); + err = -EFSCORRUPTED; + f2fs_handle_error(sbi, + ERROR_INVALID_BLKADDR); + goto err; + } + /* write dummy data page */ f2fs_replace_block(sbi, &dn, src, dest, ni.version, false, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f80439758c1c..fbb2a00e9ca4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -186,7 +186,6 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) void f2fs_abort_atomic_write(struct inode *inode, bool clean) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); if (!f2fs_is_atomic_file(inode)) @@ -199,10 +198,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) fi->cow_inode = NULL; release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_FILE); - - spin_lock(&sbi->inode_lock[ATOMIC_FILE]); - sbi->atomic_files--; - spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + stat_dec_atomic_inode(inode); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@ -311,6 +307,8 @@ static int __f2fs_commit_atomic_write(struct inode *inode) DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); ret = -EFSCORRUPTED; + f2fs_handle_error(sbi, + ERROR_INVALID_BLKADDR); goto out; } @@ -375,7 +373,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); } /* balance_fs_bg is able to be pending */ @@ -475,12 +473,12 @@ do_sync: mutex_lock(&sbi->flush_lock); blk_start_plug(&plug); - f2fs_sync_dirty_inodes(sbi, FILE_INODE); + f2fs_sync_dirty_inodes(sbi, FILE_INODE, false); blk_finish_plug(&plug); mutex_unlock(&sbi->flush_lock); } - f2fs_sync_fs(sbi->sb, true); + f2fs_sync_fs(sbi->sb, 1); stat_inc_bg_cp_count(sbi->stat_info); } @@ -693,7 +691,8 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) } while (ret && --count); if (ret) { - f2fs_stop_checkpoint(sbi, false); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_FLUSH_FAIL); break; } @@ -1171,7 +1170,7 @@ submit: atomic_inc(&dcc->issued_discard); - f2fs_update_iostat(sbi, FS_DISCARD, 1); + f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1); lstart += len; start += len; @@ -3389,7 +3388,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, f2fs_submit_page_write(&fio); stat_inc_meta_count(sbi, page->index); - f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -3399,7 +3398,7 @@ void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio) set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); - f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE); } void f2fs_outplace_write_data(struct dnode_of_data *dn, @@ -3413,7 +3412,7 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); - f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE); } int f2fs_inplace_write_data(struct f2fs_io_info *fio) @@ -3433,6 +3432,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", __func__, segno); err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); goto drop_bio; } @@ -3454,7 +3454,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!err) { f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + f2fs_update_iostat(fio->sbi, fio->page->mapping->host, + fio->io_type, F2FS_BLKSIZE); } return err; @@ -4380,6 +4381,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (se->type >= NR_PERSISTENT_LOG) { f2fs_err(sbi, "Invalid segment type: %u, segno: %u", se->type, start); + f2fs_handle_error(sbi, + ERROR_INCONSISTENT_SUM_TYPE); return -EFSCORRUPTED; } @@ -4416,6 +4419,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Wrong journal entry on segno %u", start); err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL); break; } @@ -4435,6 +4439,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Invalid segment type: %u, segno: %u", se->type, start); err = -EFSCORRUPTED; + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE); break; } @@ -4466,6 +4471,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (sit_valid_blocks[NODE] != valid_node_count(sbi)) { f2fs_err(sbi, "SIT is corrupted node# %u vs %u", sit_valid_blocks[NODE], valid_node_count(sbi)); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT); return -EFSCORRUPTED; } @@ -4474,6 +4480,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u", sit_valid_blocks[DATA], sit_valid_blocks[NODE], valid_user_blocks(sbi)); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT); return -EFSCORRUPTED; } @@ -4624,6 +4631,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) f2fs_err(sbi, "Current segment has invalid alloc_type:%d", curseg->alloc_type); + f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); return -EFSCORRUPTED; } @@ -4641,6 +4649,7 @@ out: "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", i, curseg->segno, curseg->alloc_type, curseg->next_blkoff, blkofs); + f2fs_handle_error(sbi, ERROR_INVALID_CURSEG); return -EFSCORRUPTED; } } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d1d63766f2c7..be8f2d7d007b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -753,6 +753,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT); return -EFSCORRUPTED; } @@ -767,6 +768,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Wrong valid blocks %d or segno %u", GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT); return -EFSCORRUPTED; } return 0; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 459e1c3d2208..0924c0208050 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -308,10 +308,10 @@ static void f2fs_destroy_casefold_cache(void) { } static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = min((sbi->user_block_count << 1) / 1000, + block_t limit = min((sbi->user_block_count >> 3), sbi->user_block_count - sbi->reserved_blocks); - /* limit is 0.2% */ + /* limit is 12.5% */ if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; @@ -1349,6 +1349,11 @@ default_check: return -EINVAL; } + if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) { + f2fs_err(sbi, "LFS not compatible with ATGC"); + return -EINVAL; + } + if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; @@ -1669,9 +1674,8 @@ static int f2fs_freeze(struct super_block *sb) if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY)) return -EINVAL; - /* ensure no checkpoint required */ - if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) - return -EINVAL; + /* Let's flush checkpoints and stop the thread. */ + f2fs_flush_ckpt_thread(F2FS_SB(sb)); /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); @@ -2184,6 +2188,9 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) f2fs_up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); + + /* Let's ensure there's no pending checkpoint anymore */ + f2fs_flush_ckpt_thread(sbi); } static int f2fs_remount(struct super_block *sb, int *flags, char *data) @@ -2349,6 +2356,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_stop_ckpt_thread(sbi); need_restart_ckpt = true; } else { + /* Flush if the prevous checkpoint, if exists. */ + f2fs_flush_ckpt_thread(sbi); + err = f2fs_start_ckpt_thread(sbi); if (err) { f2fs_err(sbi, @@ -2468,7 +2478,6 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, size_t toread; loff_t i_size = i_size_read(inode); struct page *page; - char *kaddr; if (off > i_size) return 0; @@ -2502,9 +2511,7 @@ repeat: return -EIO; } - kaddr = kmap_atomic(page); - memcpy(data, kaddr + offset, tocopy); - kunmap_atomic(kaddr); + memcpy_from_page(data, page, offset, tocopy); f2fs_put_page(page, 1); offset = 0; @@ -2526,7 +2533,6 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct page *page; void *fsdata = NULL; - char *kaddr; int err = 0; int tocopy; @@ -2545,10 +2551,7 @@ retry: break; } - kaddr = kmap_atomic(page); - memcpy(kaddr + offset, data, tocopy); - kunmap_atomic(kaddr); - flush_dcache_page(page); + memcpy_to_page(page, offset, data, tocopy); a_ops->write_end(NULL, mapping, off, tocopy, tocopy, page, fsdata); @@ -3043,23 +3046,24 @@ static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, *lblk_bits_ret = 8 * sizeof(block_t); } -static int f2fs_get_num_devices(struct super_block *sb) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - - if (f2fs_is_multi_device(sbi)) - return sbi->s_ndevs; - return 1; -} - -static void f2fs_get_devices(struct super_block *sb, - struct request_queue **devs) +static struct block_device **f2fs_get_devices(struct super_block *sb, + unsigned int *num_devs) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct block_device **devs; int i; + if (!f2fs_is_multi_device(sbi)) + return NULL; + + devs = kmalloc_array(sbi->s_ndevs, sizeof(*devs), GFP_KERNEL); + if (!devs) + return ERR_PTR(-ENOMEM); + for (i = 0; i < sbi->s_ndevs; i++) - devs[i] = bdev_get_queue(FDEV(i).bdev); + devs[i] = FDEV(i).bdev; + *num_devs = sbi->s_ndevs; + return devs; } static const struct fscrypt_operations f2fs_cryptops = { @@ -3070,7 +3074,6 @@ static const struct fscrypt_operations f2fs_cryptops = { .empty_dir = f2fs_empty_dir, .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, - .get_num_devices = f2fs_get_num_devices, .get_devices = f2fs_get_devices, }; #endif @@ -3847,6 +3850,68 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; } +void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason) +{ + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + int err; + + f2fs_down_write(&sbi->sb_lock); + + if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1)) + raw_super->s_stop_reason[reason]++; + + err = f2fs_commit_super(sbi, false); + if (err) + f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d", + reason, err); + f2fs_up_write(&sbi->sb_lock); +} + +static void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) +{ + spin_lock(&sbi->error_lock); + if (!test_bit(flag, (unsigned long *)sbi->errors)) { + set_bit(flag, (unsigned long *)sbi->errors); + sbi->error_dirty = true; + } + spin_unlock(&sbi->error_lock); +} + +static bool f2fs_update_errors(struct f2fs_sb_info *sbi) +{ + bool need_update = false; + + spin_lock(&sbi->error_lock); + if (sbi->error_dirty) { + memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors, + MAX_F2FS_ERRORS); + sbi->error_dirty = false; + need_update = true; + } + spin_unlock(&sbi->error_lock); + + return need_update; +} + +void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) +{ + int err; + + f2fs_save_errors(sbi, error); + + f2fs_down_write(&sbi->sb_lock); + + if (!f2fs_update_errors(sbi)) + goto out_unlock; + + err = f2fs_commit_super(sbi, false); + if (err) + f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d", + error, err); +out_unlock: + f2fs_up_write(&sbi->sb_lock); +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -4187,6 +4252,9 @@ try_onemore: goto free_devices; } + spin_lock_init(&sbi->error_lock); + memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); + sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 3183c44d44c5..f03254dcd7bf 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -128,6 +128,12 @@ static ssize_t sb_status_show(struct f2fs_attr *a, return sprintf(buf, "%lx\n", sbi->s_flag); } +static ssize_t cp_status_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sprintf(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags)); +} + static ssize_t pending_discard_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -528,7 +534,6 @@ out: if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) { spin_lock(&sbi->gc_urgent_high_lock); - sbi->gc_urgent_high_limited = t != 0; sbi->gc_urgent_high_remaining = t; spin_unlock(&sbi->gc_urgent_high_lock); @@ -1031,8 +1036,10 @@ static struct attribute *f2fs_feat_attrs[] = { ATTRIBUTE_GROUPS(f2fs_feat); F2FS_GENERAL_RO_ATTR(sb_status); +F2FS_GENERAL_RO_ATTR(cp_status); static struct attribute *f2fs_stat_attrs[] = { ATTR_LIST(sb_status), + ATTR_LIST(cp_status), NULL, }; ATTRIBUTE_GROUPS(f2fs_stat); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 5ac7e756a1bb..95bec0854cd2 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -47,16 +47,13 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count, size_t n = min_t(size_t, count, PAGE_SIZE - offset_in_page(pos)); struct page *page; - void *addr; page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, NULL); if (IS_ERR(page)) return PTR_ERR(page); - addr = kmap_atomic(page); - memcpy(buf, addr + offset_in_page(pos), n); - kunmap_atomic(addr); + memcpy_from_page(buf, page, offset_in_page(pos), n); put_page(page); @@ -82,7 +79,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, PAGE_SIZE - offset_in_page(pos)); struct page *page; void *fsdata; - void *addr; int res; res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, @@ -90,9 +86,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, if (res) return res; - addr = kmap_atomic(page); - memcpy(addr + offset_in_page(pos), buf, n); - kunmap_atomic(addr); + memcpy_to_page(page, offset_in_page(pos), buf, n); res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, page, fsdata); @@ -245,6 +239,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_CORRUPTED_VERITY_XATTR); return -EFSCORRUPTED; } if (buf_size) { diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index c76c15086e5f..dc2e8637189e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -367,6 +367,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); err = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_CORRUPTED_XATTR); goto out; } check: @@ -583,6 +585,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_CORRUPTED_XATTR); goto cleanup; } @@ -658,6 +662,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_CORRUPTED_XATTR); goto exit; } @@ -684,6 +690,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, inode->i_ino, ENTRY_SIZE(last)); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; + f2fs_handle_error(F2FS_I_SB(inode), + ERROR_CORRUPTED_XATTR); goto exit; } last = XATTR_NEXT_ENTRY(last); diff --git a/fs/super.c b/fs/super.c index 039464ae725a..51e830b6596b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -293,7 +293,6 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); security_sb_free(s); - fscrypt_sb_free(s); put_user_ns(s->s_user_ns); kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); @@ -454,6 +453,7 @@ void generic_shutdown_super(struct super_block *sb) evict_inodes(sb); /* only nonzero refcount inodes can have marks */ fsnotify_sb_delete(sb); + fscrypt_sb_delete(sb); security_sb_delete(sb); if (sb->s_dio_done_wq) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d445150c5350..ee0d75d9a302 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -73,6 +73,42 @@ struct f2fs_device { __le32 total_segments; } __packed; +/* reason of stop_checkpoint */ +enum stop_cp_reason { + STOP_CP_REASON_SHUTDOWN, + STOP_CP_REASON_FAULT_INJECT, + STOP_CP_REASON_META_PAGE, + STOP_CP_REASON_WRITE_FAIL, + STOP_CP_REASON_CORRUPTED_SUMMARY, + STOP_CP_REASON_UPDATE_INODE, + STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_MAX, +}; + +#define MAX_STOP_REASON 32 + +/* detail reason for EFSCORRUPTED */ +enum f2fs_error { + ERROR_CORRUPTED_CLUSTER, + ERROR_FAIL_DECOMPRESSION, + ERROR_INVALID_BLKADDR, + ERROR_CORRUPTED_DIRENT, + ERROR_CORRUPTED_INODE, + ERROR_INCONSISTENT_SUMMARY, + ERROR_INCONSISTENT_FOOTER, + ERROR_INCONSISTENT_SUM_TYPE, + ERROR_CORRUPTED_JOURNAL, + ERROR_INCONSISTENT_NODE_COUNT, + ERROR_INCONSISTENT_BLOCK_COUNT, + ERROR_INVALID_CURSEG, + ERROR_INCONSISTENT_SIT, + ERROR_CORRUPTED_VERITY_XATTR, + ERROR_CORRUPTED_XATTR, + ERROR_MAX, +}; + +#define MAX_F2FS_ERRORS 16 + struct f2fs_super_block { __le32 magic; /* Magic Number */ __le16 major_ver; /* Major Version */ @@ -116,7 +152,9 @@ struct f2fs_super_block { __u8 hot_ext_count; /* # of hot file extension */ __le16 s_encoding; /* Filename charset encoding */ __le16 s_encoding_flags; /* Filename charset encoding flags */ - __u8 reserved[306]; /* valid reserved region */ + __u8 s_stop_reason[MAX_STOP_REASON]; /* stop checkpoint reason */ + __u8 s_errors[MAX_F2FS_ERRORS]; /* reason of image corrupts */ + __u8 reserved[258]; /* valid reserved region */ __le32 crc; /* checksum of superblock */ } __packed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3dd3632de00b..98209a06ff13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1514,7 +1514,7 @@ struct super_block { const struct xattr_handler **s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; - struct key *s_master_keys; /* master crypto keys in use */ + struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 281dd4b391c6..bca45a787f90 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -161,24 +161,21 @@ struct fscrypt_operations { int *ino_bits_ret, int *lblk_bits_ret); /* - * Return the number of block devices to which the filesystem may write - * encrypted file contents. + * Return an array of pointers to the block devices to which the + * filesystem may write encrypted file contents, NULL if the filesystem + * only has a single such block device, or an ERR_PTR() on error. + * + * On successful non-NULL return, *num_devs is set to the number of + * devices in the returned array. The caller must free the returned + * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ - int (*get_num_devices)(struct super_block *sb); - - /* - * If ->get_num_devices() returns a value greater than 1, then this - * function is called to get the array of request_queues that the - * filesystem is using -- one per block device. (There may be duplicate - * entries in this array, as block devices can share a request_queue.) - */ - void (*get_devices)(struct super_block *sb, - struct request_queue **devs); + struct block_device **(*get_devices)(struct super_block *sb, + unsigned int *num_devs); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); @@ -318,7 +315,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_free(struct super_block *sb); +void fscrypt_sb_delete(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_add_test_dummy_key(struct super_block *sb, const struct fscrypt_dummy_policy *dummy_policy); @@ -355,7 +352,7 @@ u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ -void fscrypt_decrypt_bio(struct bio *bio); +bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); @@ -528,7 +525,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_free(struct super_block *sb) +static inline void fscrypt_sb_delete(struct super_block *sb) { } @@ -648,8 +645,9 @@ static inline int fscrypt_d_revalidate(struct dentry *dentry, } /* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) +static inline bool fscrypt_decrypt_bio(struct bio *bio) { + return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, @@ -770,7 +768,7 @@ bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); -bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter); +bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); @@ -803,11 +801,8 @@ static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, return true; } -static inline bool fscrypt_dio_supported(struct kiocb *iocb, - struct iov_iter *iter) +static inline bool fscrypt_dio_supported(struct inode *inode) { - const struct inode *inode = file_inode(iocb->ki_filp); - return !fscrypt_needs_contents_encryption(inode); } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 0edcd4e11638..3605ab3fd0f3 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1583,9 +1583,10 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, TRACE_EVENT(f2fs_update_extent_tree_range, TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr, - unsigned int len), + unsigned int len, + unsigned int c_len), - TP_ARGS(inode, pgofs, blkaddr, len), + TP_ARGS(inode, pgofs, blkaddr, len, c_len), TP_STRUCT__entry( __field(dev_t, dev) @@ -1593,6 +1594,7 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __field(unsigned int, pgofs) __field(u32, blk) __field(unsigned int, len) + __field(unsigned int, c_len) ), TP_fast_assign( @@ -1601,14 +1603,17 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __entry->pgofs = pgofs; __entry->blk = blkaddr; __entry->len = len; + __entry->c_len = c_len; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "blkaddr = %u, len = %u", + "blkaddr = %u, len = %u, " + "c_len = %u", show_dev_ino(__entry), __entry->pgofs, __entry->blk, - __entry->len) + __entry->len, + __entry->c_len) ); TRACE_EVENT(f2fs_shrink_extent_tree, @@ -1828,7 +1833,10 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, app_bio) __field(unsigned long long, app_wio) __field(unsigned long long, app_mio) + __field(unsigned long long, app_bcdio) + __field(unsigned long long, app_mcdio) __field(unsigned long long, fs_dio) + __field(unsigned long long, fs_cdio) __field(unsigned long long, fs_nio) __field(unsigned long long, fs_mio) __field(unsigned long long, fs_gc_dio) @@ -1840,6 +1848,8 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, app_brio) __field(unsigned long long, app_rio) __field(unsigned long long, app_mrio) + __field(unsigned long long, app_bcrio) + __field(unsigned long long, app_mcrio) __field(unsigned long long, fs_drio) __field(unsigned long long, fs_gdrio) __field(unsigned long long, fs_cdrio) @@ -1854,7 +1864,10 @@ TRACE_EVENT(f2fs_iostat, __entry->app_bio = iostat[APP_BUFFERED_IO]; __entry->app_wio = iostat[APP_WRITE_IO]; __entry->app_mio = iostat[APP_MAPPED_IO]; + __entry->app_bcdio = iostat[APP_BUFFERED_CDATA_IO]; + __entry->app_mcdio = iostat[APP_MAPPED_CDATA_IO]; __entry->fs_dio = iostat[FS_DATA_IO]; + __entry->fs_cdio = iostat[FS_CDATA_IO]; __entry->fs_nio = iostat[FS_NODE_IO]; __entry->fs_mio = iostat[FS_META_IO]; __entry->fs_gc_dio = iostat[FS_GC_DATA_IO]; @@ -1866,6 +1879,8 @@ TRACE_EVENT(f2fs_iostat, __entry->app_brio = iostat[APP_BUFFERED_READ_IO]; __entry->app_rio = iostat[APP_READ_IO]; __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; + __entry->app_bcrio = iostat[APP_BUFFERED_CDATA_READ_IO]; + __entry->app_mcrio = iostat[APP_MAPPED_CDATA_READ_IO]; __entry->fs_drio = iostat[FS_DATA_READ_IO]; __entry->fs_gdrio = iostat[FS_GDATA_READ_IO]; __entry->fs_cdrio = iostat[FS_CDATA_READ_IO]; @@ -1875,20 +1890,24 @@ TRACE_EVENT(f2fs_iostat, ), TP_printk("dev = (%d,%d), " - "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " - "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " + "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu, " + "compr(buffered=%llu, mapped=%llu)], " + "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu], " "gc [data=%llu, node=%llu], " "cp [data=%llu, node=%llu, meta=%llu], " "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " - "fs [data=%llu, (gc_data=%llu, compr_data=%llu), " + "compr(buffered=%llu, mapped=%llu)], " + "fs [data=%llu, (gc_data=%llu, cdata=%llu), " "node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, - __entry->app_bio, __entry->app_mio, __entry->fs_dio, + __entry->app_bio, __entry->app_mio, __entry->app_bcdio, + __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, __entry->fs_cp_nio, __entry->fs_cp_mio, __entry->app_rio, __entry->app_drio, __entry->app_brio, - __entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio, + __entry->app_mrio, __entry->app_bcrio, __entry->app_mcrio, + __entry->fs_drio, __entry->fs_gdrio, __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) );