f2fs: catch up to v4.14-rc1

This is cherry-picked from upstrea-f2fs-stable-linux-4.4.y.

Changes include:

commit c7fd9e2b4a ("f2fs: hurry up to issue discard after io interruption")
commit 603dde3965 ("f2fs: fix to show correct discard_granularity in sysfs")
...
commit 565f0225f9 ("f2fs: factor out discard command info into discard_cmd_control")
commit c4cc29d19e ("f2fs: remove batched discard in f2fs_trim_fs")

Change-Id: Icd8a85ac0c19a8aa25cd2591a12b4e9b85bdf1c5
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
This commit is contained in:
Jaegeuk Kim
2017-01-09 18:16:29 -08:00
committed by Jaegeuk Kim
parent b0fa18e1ca
commit 13f002354d
45 changed files with 7588 additions and 3479 deletions

View File

@@ -57,6 +57,15 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of small discard commands.
What: /sys/fs/f2fs/<disk>/discard_granularity
Date: July 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
Controls discard granularity of inner discard thread, inner thread
will not issue discards with size that is smaller than granularity.
The unit size is one block, now only support configuring in range
of [1, 512].
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -92,3 +101,47 @@ Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
Description:
Controls the count of nid pages to be readaheaded.
What: /sys/fs/f2fs/<disk>/dirty_nats_ratio
Date: January 2016
Contact: "Chao Yu" <chao2.yu@samsung.com>
Description:
Controls dirty nat entries ratio threshold, if current
ratio exceeds configured threshold, checkpoint will
be triggered for flushing dirty nat entries.
What: /sys/fs/f2fs/<disk>/lifetime_write_kbytes
Date: January 2016
Contact: "Shuoran Liu" <liushuoran@huawei.com>
Description:
Shows total written kbytes issued to disk.
What: /sys/fs/f2fs/<disk>/inject_rate
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
Description:
Controls the injection rate.
What: /sys/fs/f2fs/<disk>/inject_type
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
Description:
Controls the injection type.
What: /sys/fs/f2fs/<disk>/reserved_blocks
Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
Controls current reserved blocks in system.
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Do background GC agressively
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls sleep time of GC urgent mode

View File

@@ -125,6 +125,7 @@ active_logs=%u Support configuring the number of active logs. In the
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
noinline_xattr Disable the inline xattrs feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
inline_dentry Enable the inline dir feature: data in new created
@@ -159,6 +160,18 @@ mode=%s Control block allocation mode which supports "adaptive"
writes towards main area.
io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs".
usrquota Enable plain user disk quota accounting.
grpquota Enable plain group disk quota accounting.
prjquota Enable plain project quota accounting.
usrjquota=<file> Appoint specified file and type during mount, so that quota
grpjquota=<file> information can be properly updated during recovery flow,
prjjquota=<file> <quota file>: must be in root directory;
jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1].
offusrjquota Turn off user journelled quota.
offgrpjquota Turn off group journelled quota.
offprjjquota Turn off project journelled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
================================================================================
DEBUGFS ENTRIES
@@ -204,6 +217,15 @@ Files in /sys/fs/f2fs/<devname>
gc_idle = 1 will select the Cost Benefit approach
& setting gc_idle = 2 will select the greedy approach.
gc_urgent This parameter controls triggering background GCs
urgently or not. Setting gc_urgent = 0 [default]
makes back to default behavior, while if it is set
to 1, background thread starts to do GC by given
gc_urgent_sleep_time interval.
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
500 ms is set by default. See above gc_urgent.
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
segments is larger than the number of segments

View File

@@ -1,3 +1,4 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
fscrypto-y := crypto.o fname.o policy.o keyinfo.o
fscrypto-$(CONFIG_BLOCK) += bio.o

143
fs/crypto/bio.c Normal file
View File

@@ -0,0 +1,143 @@
/*
* This contains encryption functions for per-file encryption.
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* Written by Michael Halcrow, 2014.
*
* Filename encryption additions
* Uday Savagaonkar, 2014
* Encryption policy handling additions
* Ildar Muslukhov, 2014
* Add fscrypt_pullback_bio_page()
* Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/namei.h>
#include "fscrypt_private.h"
/*
* Call fscrypt_decrypt_page on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct fscrypt_ctx *ctx =
container_of(work, struct fscrypt_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = fscrypt_decrypt_page(page->mapping->host, page,
PAGE_SIZE, 0, page->index);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
fscrypt_release_ctx(ctx);
bio_put(bio);
}
void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
struct fscrypt_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct fscrypt_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
if (restore)
fscrypt_restore_control_page(bounce_page);
}
EXPORT_SYMBOL(fscrypt_pullback_bio_page);
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
int ret, err = 0;
BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
while (len--) {
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
ZERO_PAGE(0), ciphertext_page,
PAGE_SIZE, 0, GFP_NOFS);
if (err)
goto errout;
bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
/* should never happen! */
WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
}
err = submit_bio_wait(0, bio);
bio_put(bio);
if (err)
goto errout;
lblk++;
pblk++;
}
err = 0;
errout:
fscrypt_release_ctx(ctx);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);

View File

@@ -24,10 +24,10 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/fscrypto.h>
#include <crypto/aes.h>
#include "fscrypt_private.h"
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
@@ -44,7 +44,7 @@ static mempool_t *fscrypt_bounce_page_pool = NULL;
static LIST_HEAD(fscrypt_free_ctxs);
static DEFINE_SPINLOCK(fscrypt_ctx_lock);
static struct workqueue_struct *fscrypt_read_workqueue;
struct workqueue_struct *fscrypt_read_workqueue;
static DEFINE_MUTEX(fscrypt_init_mutex);
static struct kmem_cache *fscrypt_ctx_cachep;
@@ -63,7 +63,7 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
ctx->w.bounce_page = NULL;
}
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(fscrypt_release_ctx);
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx = NULL;
struct fscrypt_info *ci = inode->i_crypt_info;
@@ -121,7 +121,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
ctx->flags &= ~FS_WRITE_PATH_FL;
ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
@@ -141,20 +141,15 @@ static void page_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
typedef enum {
FS_DECRYPT = 0,
FS_ENCRYPT,
} fscrypt_direction_t;
static int do_page_crypto(struct inode *inode,
fscrypt_direction_t rw, pgoff_t index,
struct page *src_page, struct page *dest_page,
gfp_t gfp_flags)
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
unsigned int offs, gfp_t gfp_flags)
{
struct {
__le64 index;
u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
} xts_tweak;
u8 padding[FS_IV_SIZE - sizeof(__le64)];
} iv;
struct skcipher_request *req = NULL;
DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
@@ -162,6 +157,18 @@ static int do_page_crypto(struct inode *inode,
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
BUG_ON(len == 0);
BUILD_BUG_ON(sizeof(iv) != FS_IV_SIZE);
BUILD_BUG_ON(AES_BLOCK_SIZE != FS_IV_SIZE);
iv.index = cpu_to_le64(lblk_num);
memset(iv.padding, 0, sizeof(iv.padding));
if (ci->ci_essiv_tfm != NULL) {
crypto_cipher_encrypt_one(ci->ci_essiv_tfm, (u8 *)&iv,
(u8 *)&iv);
}
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
@@ -174,15 +181,11 @@ static int do_page_crypto(struct inode *inode,
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
page_crypt_complete, &ecr);
BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
xts_tweak.index = cpu_to_le64(index);
memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
sg_set_page(&dst, dest_page, len, offs);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_SIZE, 0);
skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
sg_set_page(&src, src_page, len, offs);
skcipher_request_set_crypt(req, &src, &dst, len, &iv);
if (rw == FS_DECRYPT)
res = crypto_skcipher_decrypt(req);
else
@@ -202,53 +205,86 @@ static int do_page_crypto(struct inode *inode,
return 0;
}
static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags)
{
ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= FS_WRITE_PATH_FL;
ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx->w.bounce_page;
}
/**
* fscypt_encrypt_page() - Encrypts a page
* @inode: The inode for which the encryption should take place
* @plaintext_page: The page to encrypt. Must be locked.
* @gfp_flags: The gfp flag for memory allocation
* @inode: The inode for which the encryption should take place
* @page: The page to encrypt. Must be locked for bounce-page
* encryption.
* @len: Length of data to encrypt in @page and encrypted
* data in returned page.
* @offs: Offset of data within @page and returned
* page holding encrypted data.
* @lblk_num: Logical block number. This must be unique for multiple
* calls with same inode, except when overwriting
* previously written data.
* @gfp_flags: The gfp flag for memory allocation
*
* Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
* encryption context.
* Encrypts @page using the ctx encryption context. Performs encryption
* either in-place or into a newly allocated bounce page.
* Called on the page write path.
*
* Called on the page write path. The caller must call
* Bounce page allocation is the default.
* In this case, the contents of @page are encrypted and stored in an
* allocated bounce page. @page has to be locked and the caller must call
* fscrypt_restore_control_page() on the returned ciphertext page to
* release the bounce buffer and the encryption context.
*
* Return: An allocated page with the encrypted content on success. Else, an
* In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
* fscrypt_operations. Here, the input-page is returned with its content
* encrypted.
*
* Return: A page with the encrypted content on success. Else, an
* error value or NULL.
*/
struct page *fscrypt_encrypt_page(struct inode *inode,
struct page *plaintext_page, gfp_t gfp_flags)
struct page *fscrypt_encrypt_page(const struct inode *inode,
struct page *page,
unsigned int len,
unsigned int offs,
u64 lblk_num, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct page *ciphertext_page = page;
int err;
BUG_ON(!PageLocked(plaintext_page));
BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
/* with inplace-encryption we just encrypt the page */
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
ciphertext_page, len, offs,
gfp_flags);
if (err)
return ERR_PTR(err);
return ciphertext_page;
}
BUG_ON(!PageLocked(page));
ctx = fscrypt_get_ctx(inode, gfp_flags);
if (IS_ERR(ctx))
return (struct page *)ctx;
/* The encryption operation will require a bounce page. */
ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
plaintext_page, ciphertext_page,
gfp_flags);
ctx->w.control_page = page;
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
page, ciphertext_page, len, offs,
gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
goto errout;
@@ -265,8 +301,13 @@ errout:
EXPORT_SYMBOL(fscrypt_encrypt_page);
/**
* f2crypt_decrypt_page() - Decrypts a page in-place
* @page: The page to decrypt. Must be locked.
* fscrypt_decrypt_page() - Decrypts a page in-place
* @inode: The corresponding inode for the page to decrypt.
* @page: The page to decrypt. Must be locked in case
* it is a writeback page (FS_CFLG_OWN_PAGES unset).
* @len: Number of bytes in @page to be decrypted.
* @offs: Start of data in @page.
* @lblk_num: Logical block number.
*
* Decrypts page in-place using the ctx encryption context.
*
@@ -274,75 +315,17 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_decrypt_page(struct page *page)
int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
unsigned int len, unsigned int offs, u64 lblk_num)
{
BUG_ON(!PageLocked(page));
if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
BUG_ON(!PageLocked(page));
return do_page_crypto(page->mapping->host,
FS_DECRYPT, page->index, page, page, GFP_NOFS);
return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
len, offs, GFP_NOFS);
}
EXPORT_SYMBOL(fscrypt_decrypt_page);
int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
int ret, err = 0;
BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
while (len--) {
err = do_page_crypto(inode, FS_ENCRYPT, lblk,
ZERO_PAGE(0), ciphertext_page,
GFP_NOFS);
if (err)
goto errout;
bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
/* should never happen! */
WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
}
err = submit_bio_wait(WRITE, bio);
if ((err == 0) && bio->bi_error)
err = -EIO;
bio_put(bio);
if (err)
goto errout;
lblk++;
pblk++;
}
err = 0;
errout:
fscrypt_release_ctx(ctx);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
/*
* Validate dentries for encrypted directories to make sure we aren't
* potentially caching stale data after a key has been added or
@@ -351,7 +334,6 @@ EXPORT_SYMBOL(fscrypt_zeroout_range);
static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
{
struct dentry *dir;
struct fscrypt_info *ci;
int dir_has_key, cached_with_key;
if (flags & LOOKUP_RCU)
@@ -363,18 +345,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
ci = d_inode(dir)->i_crypt_info;
if (ci && ci->ci_keyring_key &&
(ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_DEAD))))
ci = NULL;
/* this should eventually be an flag in d_flags */
spin_lock(&dentry->d_lock);
cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
dir_has_key = (ci != NULL);
dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
dput(dir);
/*
@@ -399,63 +374,6 @@ const struct dentry_operations fscrypt_d_ops = {
};
EXPORT_SYMBOL(fscrypt_d_ops);
/*
* Call fscrypt_decrypt_page on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct fscrypt_ctx *ctx =
container_of(work, struct fscrypt_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = fscrypt_decrypt_page(page);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
fscrypt_release_ctx(ctx);
bio_put(bio);
}
void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
struct fscrypt_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct fscrypt_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
if (restore)
fscrypt_restore_control_page(bounce_page);
}
EXPORT_SYMBOL(fscrypt_pullback_bio_page);
void fscrypt_restore_control_page(struct page *page)
{
struct fscrypt_ctx *ctx;
@@ -481,17 +399,22 @@ static void fscrypt_destroy(void)
/**
* fscrypt_initialize() - allocate major buffers for fs encryption.
* @cop_flags: fscrypt operations flags
*
* We only call this when we start accessing encrypted files, since it
* results in memory getting allocated that wouldn't otherwise be used.
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_initialize(void)
int fscrypt_initialize(unsigned int cop_flags)
{
int i, res = -ENOMEM;
if (fscrypt_bounce_page_pool)
/*
* No need to allocate a bounce page pool if there already is one or
* this FS won't use it.
*/
if (cop_flags & FS_CFLG_OWN_PAGES || fscrypt_bounce_page_pool)
return 0;
mutex_lock(&fscrypt_init_mutex);
@@ -520,7 +443,6 @@ fail:
mutex_unlock(&fscrypt_init_mutex);
return res;
}
EXPORT_SYMBOL(fscrypt_initialize);
/**
* fscrypt_init() - Set up for fs encryption.
@@ -562,6 +484,8 @@ static void __exit fscrypt_exit(void)
destroy_workqueue(fscrypt_read_workqueue);
kmem_cache_destroy(fscrypt_ctx_cachep);
kmem_cache_destroy(fscrypt_info_cachep);
fscrypt_essiv_cleanup();
}
module_exit(fscrypt_exit);

View File

@@ -12,7 +12,7 @@
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/fscrypto.h>
#include "fscrypt_private.h"
/**
* fname_crypt_complete() - completion callback for filename crypto
@@ -159,6 +159,8 @@ static int fname_decrypt(struct inode *inode,
static const char *lookup_table =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
/**
* digest_encode() -
*
@@ -209,7 +211,7 @@ static int digest_decode(const char *src, int len, char *dst)
return cp - dst;
}
u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen)
{
int padding = 32;
struct fscrypt_info *ci = inode->i_crypt_info;
@@ -227,14 +229,17 @@ EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
* Allocates an output buffer that is sufficient for the crypto operation
* specified by the context and the direction.
*/
int fscrypt_fname_alloc_buffer(struct inode *inode,
int fscrypt_fname_alloc_buffer(const struct inode *inode,
u32 ilen, struct fscrypt_str *crypto_str)
{
unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
u32 olen = fscrypt_fname_encrypted_size(inode, ilen);
const u32 max_encoded_len =
max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE),
1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name)));
crypto_str->len = olen;
if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
olen = max(olen, max_encoded_len);
/*
* Allocated buffer can hold one more character to null-terminate the
* string
@@ -266,6 +271,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer);
*
* The caller must have allocated sufficient memory for the @oname string.
*
* If the key is available, we'll decrypt the disk name; otherwise, we'll encode
* it for presentation. Short names are directly base64-encoded, while long
* names are encoded in fscrypt_digested_name format.
*
* Return: 0 on success, -errno on failure
*/
int fscrypt_fname_disk_to_usr(struct inode *inode,
@@ -274,7 +283,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
struct fscrypt_str *oname)
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
struct fscrypt_digested_name digested_name;
if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
@@ -289,20 +298,24 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
if (inode->i_crypt_info)
return fname_decrypt(inode, iname, oname);
if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) {
oname->len = digest_encode(iname->name, iname->len,
oname->name);
return 0;
}
if (hash) {
memcpy(buf, &hash, 4);
memcpy(buf + 4, &minor_hash, 4);
digested_name.hash = hash;
digested_name.minor_hash = minor_hash;
} else {
memset(buf, 0, 8);
digested_name.hash = 0;
digested_name.minor_hash = 0;
}
memcpy(buf + 8, iname->name + iname->len - 16, 16);
memcpy(digested_name.digest,
FSCRYPT_FNAME_DIGEST(iname->name, iname->len),
FSCRYPT_FNAME_DIGEST_SIZE);
oname->name[0] = '_';
oname->len = 1 + digest_encode(buf, 24, oname->name + 1);
oname->len = 1 + digest_encode((const char *)&digested_name,
sizeof(digested_name), oname->name + 1);
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
@@ -332,14 +345,39 @@ int fscrypt_fname_usr_to_disk(struct inode *inode,
* in a directory. Consequently, a user space name cannot be mapped to
* a disk-space name
*/
return -EACCES;
return -ENOKEY;
}
EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
/**
* fscrypt_setup_filename() - prepare to search a possibly encrypted directory
* @dir: the directory that will be searched
* @iname: the user-provided filename being searched for
* @lookup: 1 if we're allowed to proceed without the key because it's
* ->lookup() or we're finding the dir_entry for deletion; 0 if we cannot
* proceed without the key because we're going to create the dir_entry.
* @fname: the filename information to be filled in
*
* Given a user-provided filename @iname, this function sets @fname->disk_name
* to the name that would be stored in the on-disk directory entry, if possible.
* If the directory is unencrypted this is simply @iname. Else, if we have the
* directory's encryption key, then @iname is the plaintext, so we encrypt it to
* get the disk_name.
*
* Else, for keyless @lookup operations, @iname is the presented ciphertext, so
* we decode it to get either the ciphertext disk_name (for short names) or the
* fscrypt_digested_name (for long names). Non-@lookup operations will be
* impossible in this case, so we fail them with ENOKEY.
*
* If successful, fscrypt_free_filename() must be called later to clean up.
*
* Return: 0 on success, -errno on failure
*/
int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
int ret = 0, bigname = 0;
int ret;
int digested;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
@@ -350,7 +388,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
fname->disk_name.len = iname->len;
return 0;
}
ret = get_crypt_info(dir);
ret = fscrypt_get_encryption_info(dir);
if (ret && ret != -EOPNOTSUPP)
return ret;
@@ -367,31 +405,43 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
if (!lookup)
return -EACCES;
return -ENOKEY;
/*
* We don't have the key and we are doing a lookup; decode the
* user-supplied name
*/
if (iname->name[0] == '_')
bigname = 1;
if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43)))
return -ENOENT;
if (iname->name[0] == '_') {
if (iname->len !=
1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name)))
return -ENOENT;
digested = 1;
} else {
if (iname->len >
BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE))
return -ENOENT;
digested = 0;
}
fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
fname->crypto_buf.name =
kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE,
sizeof(struct fscrypt_digested_name)),
GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
ret = digest_decode(iname->name + bigname, iname->len - bigname,
ret = digest_decode(iname->name + digested, iname->len - digested,
fname->crypto_buf.name);
if (ret < 0) {
ret = -ENOENT;
goto errout;
}
fname->crypto_buf.len = ret;
if (bigname) {
memcpy(&fname->hash, fname->crypto_buf.name, 4);
memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4);
if (digested) {
const struct fscrypt_digested_name *n =
(const void *)fname->crypto_buf.name;
fname->hash = n->hash;
fname->minor_hash = n->minor_hash;
} else {
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
@@ -403,12 +453,3 @@ errout:
return ret;
}
EXPORT_SYMBOL(fscrypt_setup_filename);
void fscrypt_free_filename(struct fscrypt_name *fname)
{
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
}
EXPORT_SYMBOL(fscrypt_free_filename);

107
fs/crypto/fscrypt_private.h Normal file
View File

@@ -0,0 +1,107 @@
/*
* fscrypt_private.h
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions.
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
#ifndef _FSCRYPT_PRIVATE_H
#define _FSCRYPT_PRIVATE_H
#include <linux/fscrypt_supp.h>
#include <crypto/hash.h>
/* Encryption parameters */
#define FS_IV_SIZE 16
#define FS_AES_128_ECB_KEY_SIZE 16
#define FS_AES_128_CBC_KEY_SIZE 16
#define FS_AES_128_CTS_KEY_SIZE 16
#define FS_AES_256_GCM_KEY_SIZE 32
#define FS_AES_256_CBC_KEY_SIZE 32
#define FS_AES_256_CTS_KEY_SIZE 32
#define FS_AES_256_XTS_KEY_SIZE 64
#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct fscrypt_context {
u8 format;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
} __packed;
#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
/*
* A pointer to this structure is stored in the file system's in-core
* representation of an inode.
*/
struct fscrypt_info {
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
struct crypto_cipher *ci_essiv_tfm;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
typedef enum {
FS_DECRYPT = 0,
FS_ENCRYPT,
} fscrypt_direction_t;
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
struct fscrypt_completion_result {
struct completion completion;
int res;
};
#define DECLARE_FS_COMPLETION_RESULT(ecr) \
struct fscrypt_completion_result ecr = { \
COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 }
/* bio stuffs */
#define REQ_OP_READ READ
#define REQ_OP_WRITE WRITE
#define bio_op(bio) ((bio)->bi_rw & 1)
static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
unsigned op_flags)
{
bio->bi_rw = op | op_flags;
}
/* crypto.c */
extern int fscrypt_initialize(unsigned int cop_flags);
extern struct workqueue_struct *fscrypt_read_workqueue;
extern int fscrypt_do_page_crypto(const struct inode *inode,
fscrypt_direction_t rw, u64 lblk_num,
struct page *src_page,
struct page *dest_page,
unsigned int len, unsigned int offs,
gfp_t gfp_flags);
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
/* keyinfo.c */
extern void __exit fscrypt_essiv_cleanup(void);
#endif /* _FSCRYPT_PRIVATE_H */

View File

@@ -10,7 +10,12 @@
#include <keys/user-type.h>
#include <linux/scatterlist.h>
#include <linux/fscrypto.h>
#include <linux/ratelimit.h>
#include <crypto/aes.h>
#include <crypto/sha.h>
#include "fscrypt_private.h"
static struct crypto_shash *essiv_hash_tfm;
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
{
@@ -27,13 +32,13 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
* derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
* @derived_raw_key: Derived raw key.
*
* Return: Zero on success; non-zero otherwise.
*/
static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
u8 source_key[FS_AES_256_XTS_KEY_SIZE],
u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
const struct fscrypt_key *source_key,
u8 derived_raw_key[FS_MAX_KEY_SIZE])
{
int res = 0;
struct skcipher_request *req = NULL;
@@ -60,10 +65,10 @@ static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
skcipher_request_set_crypt(req, &src_sg, &dst_sg,
FS_AES_256_XTS_KEY_SIZE, NULL);
sg_init_one(&src_sg, source_key->raw, source_key->size);
sg_init_one(&dst_sg, derived_raw_key, source_key->size);
skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
NULL);
res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
@@ -77,28 +82,25 @@ out:
static int validate_user_key(struct fscrypt_info *crypt_info,
struct fscrypt_context *ctx, u8 *raw_key,
u8 *prefix, int prefix_size)
const char *prefix, int min_keysize)
{
u8 *full_key_descriptor;
char *description;
struct key *keyring_key;
struct fscrypt_key *master_key;
const struct user_key_payload *ukp;
int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
int res;
full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
if (!full_key_descriptor)
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
FS_KEY_DESCRIPTOR_SIZE,
ctx->master_key_descriptor);
if (!description)
return -ENOMEM;
memcpy(full_key_descriptor, prefix, prefix_size);
sprintf(full_key_descriptor + prefix_size,
"%*phN", FS_KEY_DESCRIPTOR_SIZE,
ctx->master_key_descriptor);
full_key_descriptor[full_key_len - 1] = '\0';
keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
kfree(full_key_descriptor);
keyring_key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
down_read(&keyring_key->sem);
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
@@ -106,66 +108,68 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
res = -ENOKEY;
goto out;
}
down_read(&keyring_key->sem);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
up_read(&keyring_key->sem);
goto out;
}
master_key = (struct fscrypt_key *)ukp->data;
BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
|| master_key->size % AES_BLOCK_SIZE != 0) {
printk_once(KERN_WARNING
"%s: key size incorrect: %d\n",
__func__, master_key->size);
res = -ENOKEY;
up_read(&keyring_key->sem);
goto out;
}
res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
crypt_info->ci_keyring_key = keyring_key;
return 0;
res = derive_key_aes(ctx->nonce, master_key, raw_key);
out:
up_read(&keyring_key->sem);
key_put(keyring_key);
return res;
}
static const struct {
const char *cipher_str;
int keysize;
} available_modes[] = {
[FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
FS_AES_256_XTS_KEY_SIZE },
[FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
FS_AES_256_CTS_KEY_SIZE },
[FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
FS_AES_128_CBC_KEY_SIZE },
[FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
FS_AES_128_CTS_KEY_SIZE },
};
static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
const char **cipher_str_ret, int *keysize_ret)
{
u32 mode;
if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
inode->i_ino,
ci->ci_data_mode, ci->ci_filename_mode);
return -EINVAL;
}
if (S_ISREG(inode->i_mode)) {
if (ci->ci_data_mode == FS_ENCRYPTION_MODE_AES_256_XTS) {
*cipher_str_ret = "xts(aes)";
*keysize_ret = FS_AES_256_XTS_KEY_SIZE;
return 0;
}
pr_warn_once("fscrypto: unsupported contents encryption mode "
"%d for inode %lu\n",
ci->ci_data_mode, inode->i_ino);
return -ENOKEY;
mode = ci->ci_data_mode;
} else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
mode = ci->ci_filename_mode;
} else {
WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
inode->i_ino, (inode->i_mode & S_IFMT));
return -EINVAL;
}
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
if (ci->ci_filename_mode == FS_ENCRYPTION_MODE_AES_256_CTS) {
*cipher_str_ret = "cts(cbc(aes))";
*keysize_ret = FS_AES_256_CTS_KEY_SIZE;
return 0;
}
pr_warn_once("fscrypto: unsupported filenames encryption mode "
"%d for inode %lu\n",
ci->ci_filename_mode, inode->i_ino);
return -ENOKEY;
}
pr_warn_once("fscrypto: unsupported file type %d for inode %lu\n",
(inode->i_mode & S_IFMT), inode->i_ino);
return -ENOKEY;
*cipher_str_ret = available_modes[mode].cipher_str;
*keysize_ret = available_modes[mode].keysize;
return 0;
}
static void put_crypt_info(struct fscrypt_info *ci)
@@ -173,12 +177,78 @@ static void put_crypt_info(struct fscrypt_info *ci)
if (!ci)
return;
key_put(ci->ci_keyring_key);
crypto_free_skcipher(ci->ci_ctfm);
crypto_free_cipher(ci->ci_essiv_tfm);
kmem_cache_free(fscrypt_info_cachep, ci);
}
int get_crypt_info(struct inode *inode)
static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
{
struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
/* init hash transform on demand */
if (unlikely(!tfm)) {
struct crypto_shash *prev_tfm;
tfm = crypto_alloc_shash("sha256", 0, 0);
if (IS_ERR(tfm)) {
pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
if (prev_tfm) {
crypto_free_shash(tfm);
tfm = prev_tfm;
}
}
{
SHASH_DESC_ON_STACK(desc, tfm);
desc->tfm = tfm;
desc->flags = 0;
return crypto_shash_digest(desc, key, keysize, salt);
}
}
static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
int keysize)
{
int err;
struct crypto_cipher *essiv_tfm;
u8 salt[SHA256_DIGEST_SIZE];
essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
if (IS_ERR(essiv_tfm))
return PTR_ERR(essiv_tfm);
ci->ci_essiv_tfm = essiv_tfm;
err = derive_essiv_salt(raw_key, keysize, salt);
if (err)
goto out;
/*
* Using SHA256 to derive the salt/key will result in AES-256 being
* used for IV generation. File contents encryption will still use the
* configured keysize (AES-128) nevertheless.
*/
err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
if (err)
goto out;
out:
memzero_explicit(salt, sizeof(salt));
return err;
}
void __exit fscrypt_essiv_cleanup(void)
{
crypto_free_shash(essiv_hash_tfm);
}
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *crypt_info;
struct fscrypt_context ctx;
@@ -188,30 +258,24 @@ int get_crypt_info(struct inode *inode)
u8 *raw_key = NULL;
int res;
res = fscrypt_initialize();
if (inode->i_crypt_info)
return 0;
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
if (res)
return res;
if (!inode->i_sb->s_cop->get_context)
return -EOPNOTSUPP;
retry:
crypt_info = ACCESS_ONCE(inode->i_crypt_info);
if (crypt_info) {
if (!crypt_info->ci_keyring_key ||
key_validate(crypt_info->ci_keyring_key) == 0)
return 0;
fscrypt_put_encryption_info(inode, crypt_info);
goto retry;
}
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode))
if (!fscrypt_dummy_context_enabled(inode) ||
inode->i_sb->s_cop->is_encrypted(inode))
return res;
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
@@ -230,7 +294,7 @@ retry:
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
crypt_info->ci_keyring_key = NULL;
crypt_info->ci_essiv_tfm = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
@@ -247,20 +311,12 @@ retry:
if (!raw_key)
goto out;
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
}
res = validate_user_key(crypt_info, &ctx, raw_key,
FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
keysize);
if (res && inode->i_sb->s_cop->key_prefix) {
u8 *prefix = NULL;
int prefix_size, res2;
prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
res2 = validate_user_key(crypt_info, &ctx, raw_key,
prefix, prefix_size);
int res2 = validate_user_key(crypt_info, &ctx, raw_key,
inode->i_sb->s_cop->key_prefix,
keysize);
if (res2) {
if (res2 == -ENOKEY)
res = -ENOKEY;
@@ -269,30 +325,35 @@ retry:
} else if (res) {
goto out;
}
got_key:
ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
"%s: error %d (inode %u) allocating crypto tfm\n",
__func__, res, (unsigned) inode->i_ino);
pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
__func__, res, inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_skcipher_clear_flags(ctfm, ~0);
crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
/*
* if the provided key is longer than keysize, we use the first
* keysize bytes of the derived key only
*/
res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
if (res)
goto out;
kzfree(raw_key);
raw_key = NULL;
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
put_crypt_info(crypt_info);
goto retry;
if (S_ISREG(inode->i_mode) &&
crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
res = init_essiv_generator(crypt_info, raw_key, keysize);
if (res) {
pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
__func__, res, inode->i_ino);
goto out;
}
}
return 0;
if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
@@ -300,6 +361,7 @@ out:
kzfree(raw_key);
return res;
}
EXPORT_SYMBOL(fscrypt_get_encryption_info);
void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
{
@@ -317,17 +379,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
put_crypt_info(ci);
}
EXPORT_SYMBOL(fscrypt_put_encryption_info);
int fscrypt_get_encryption_info(struct inode *inode)
{
struct fscrypt_info *ci = inode->i_crypt_info;
if (!ci ||
(ci->ci_keyring_key &&
(ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
(1 << KEY_FLAG_REVOKED) |
(1 << KEY_FLAG_DEAD)))))
return get_crypt_info(inode);
return 0;
}
EXPORT_SYMBOL(fscrypt_get_encryption_info);

View File

@@ -10,76 +10,37 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/fscrypto.h>
#include <linux/mount.h>
static int inode_has_encryption_context(struct inode *inode)
{
if (!inode->i_sb->s_cop->get_context)
return 0;
return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
}
#include "fscrypt_private.h"
/*
* check whether the policy is consistent with the encryption context
* for the inode
* check whether an encryption policy is consistent with an encryption context
*/
static int is_encryption_context_consistent_with_policy(struct inode *inode,
static bool is_encryption_context_consistent_with_policy(
const struct fscrypt_context *ctx,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->get_context)
return 0;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res != sizeof(ctx))
return 0;
return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx.flags == policy->flags) &&
(ctx.contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx.filenames_encryption_mode ==
policy->filenames_encryption_mode));
return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx->flags == policy->flags) &&
(ctx->contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx->filenames_encryption_mode ==
policy->filenames_encryption_mode);
}
static int create_encryption_context_from_policy(struct inode *inode,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
if (inode->i_sb->s_cop->prepare_context) {
res = inode->i_sb->s_cop->prepare_context(inode);
if (res)
return res;
}
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (!fscrypt_valid_contents_enc_mode(
policy->contents_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid contents encryption mode %d\n", __func__,
policy->contents_encryption_mode);
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
policy->filenames_encryption_mode))
return -EINVAL;
}
if (!fscrypt_valid_filenames_enc_mode(
policy->filenames_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid filenames encryption mode %d\n", __func__,
policy->filenames_encryption_mode);
return -EINVAL;
}
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
@@ -93,16 +54,20 @@ static int create_encryption_context_from_policy(struct inode *inode,
return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
}
int fscrypt_process_policy(struct file *filp,
const struct fscrypt_policy *policy)
int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int ret;
struct fscrypt_context ctx;
if (copy_from_user(&policy, arg, sizeof(policy)))
return -EFAULT;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (policy->version != 0)
if (policy.version != 0)
return -EINVAL;
ret = mnt_want_write_file(filp);
@@ -111,22 +76,23 @@ int fscrypt_process_policy(struct file *filp,
inode_lock(inode);
if (!inode_has_encryption_context(inode)) {
ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -EINVAL;
else if (!inode->i_sb->s_cop->empty_dir)
ret = -EOPNOTSUPP;
ret = -ENOTDIR;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
ret = create_encryption_context_from_policy(inode,
policy);
} else if (!is_encryption_context_consistent_with_policy(inode,
policy)) {
printk(KERN_WARNING
"%s: Policy inconsistent with encryption context\n",
__func__);
ret = -EINVAL;
&policy);
} else if (ret == sizeof(ctx) &&
is_encryption_context_consistent_with_policy(&ctx,
&policy)) {
/* The file already uses the same encryption policy. */
ret = 0;
} else if (ret >= 0 || ret == -ERANGE) {
/* The file already uses a different encryption policy. */
ret = -EEXIST;
}
inode_unlock(inode);
@@ -134,49 +100,94 @@ int fscrypt_process_policy(struct file *filp,
mnt_drop_write_file(filp);
return ret;
}
EXPORT_SYMBOL(fscrypt_process_policy);
EXPORT_SYMBOL(fscrypt_ioctl_set_policy);
int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
{
struct inode *inode = file_inode(filp);
struct fscrypt_context ctx;
struct fscrypt_policy policy;
int res;
if (!inode->i_sb->s_cop->get_context ||
!inode->i_sb->s_cop->is_encrypted(inode))
if (!inode->i_sb->s_cop->is_encrypted(inode))
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0 && res != -ERANGE)
return res;
if (res != sizeof(ctx))
return -ENODATA;
return -EINVAL;
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
policy->version = 0;
policy->contents_encryption_mode = ctx.contents_encryption_mode;
policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
policy->flags = ctx.flags;
memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
policy.version = 0;
policy.contents_encryption_mode = ctx.contents_encryption_mode;
policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
policy.flags = ctx.flags;
memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (copy_to_user(arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL(fscrypt_get_policy);
EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
/**
* fscrypt_has_permitted_context() - is a file's encryption policy permitted
* within its directory?
*
* @parent: inode for parent directory
* @child: inode for file being looked up, opened, or linked into @parent
*
* Filesystems must call this before permitting access to an inode in a
* situation where the parent directory is encrypted (either before allowing
* ->lookup() to succeed, or for a regular file before allowing it to be opened)
* and before any operation that involves linking an inode into an encrypted
* directory, including link, rename, and cross rename. It enforces the
* constraint that within a given encrypted directory tree, all files use the
* same encryption policy. The pre-access check is needed to detect potentially
* malicious offline violations of this constraint, while the link and rename
* checks are needed to prevent online violations of this constraint.
*
* Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail
* the filesystem operation with EPERM.
*/
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
{
struct fscrypt_info *parent_ci, *child_ci;
const struct fscrypt_operations *cops = parent->i_sb->s_cop;
const struct fscrypt_info *parent_ci, *child_ci;
struct fscrypt_context parent_ctx, child_ctx;
int res;
if ((parent == NULL) || (child == NULL)) {
printk(KERN_ERR "parent %p child %p\n", parent, child);
BUG_ON(1);
}
/* no restrictions if the parent directory is not encrypted */
if (!parent->i_sb->s_cop->is_encrypted(parent))
/* No restrictions on file types which are never encrypted */
if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
!S_ISLNK(child->i_mode))
return 1;
/* if the child directory is not encrypted, this is always a problem */
if (!parent->i_sb->s_cop->is_encrypted(child))
/* No restrictions if the parent directory is unencrypted */
if (!cops->is_encrypted(parent))
return 1;
/* Encrypted directories must not contain unencrypted files */
if (!cops->is_encrypted(child))
return 0;
/*
* Both parent and child are encrypted, so verify they use the same
* encryption policy. Compare the fscrypt_info structs if the keys are
* available, otherwise retrieve and compare the fscrypt_contexts.
*
* Note that the fscrypt_context retrieval will be required frequently
* when accessing an encrypted directory tree without the key.
* Performance-wise this is not a big deal because we already don't
* really optimize for file access without the key (to the extent that
* such access is even possible), given that any attempted access
* already causes a fscrypt_context retrieval and keyring search.
*
* In any case, if an unexpected error occurs, fall back to "forbidden".
*/
res = fscrypt_get_encryption_info(parent);
if (res)
return 0;
@@ -185,17 +196,32 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
return 0;
parent_ci = parent->i_crypt_info;
child_ci = child->i_crypt_info;
if (!parent_ci && !child_ci)
return 1;
if (!parent_ci || !child_ci)
if (parent_ci && child_ci) {
return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode ==
child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags);
}
res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx));
if (res != sizeof(parent_ctx))
return 0;
return (memcmp(parent_ci->ci_master_key,
child_ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags));
res = cops->get_context(child, &child_ctx, sizeof(child_ctx));
if (res != sizeof(child_ctx))
return 0;
return memcmp(parent_ctx.master_key_descriptor,
child_ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ctx.contents_encryption_mode ==
child_ctx.contents_encryption_mode) &&
(parent_ctx.filenames_encryption_mode ==
child_ctx.filenames_encryption_mode) &&
(parent_ctx.flags == child_ctx.flags);
}
EXPORT_SYMBOL(fscrypt_has_permitted_context);
@@ -204,9 +230,9 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context);
* @parent: Parent inode from which the context is inherited.
* @child: Child inode that inherits the context from @parent.
* @fs_data: private data given by FS.
* @preload: preload child i_crypt_info
* @preload: preload child i_crypt_info if true
*
* Return: Zero on success, non-zero otherwise
* Return: 0 on success, -errno on failure
*/
int fscrypt_inherit_context(struct inode *parent, struct inode *child,
void *fs_data, bool preload)
@@ -215,9 +241,6 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
struct fscrypt_info *ci;
int res;
if (!parent->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
res = fscrypt_get_encryption_info(parent);
if (res < 0)
return res;
@@ -227,19 +250,11 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
return -ENOKEY;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
if (fscrypt_dummy_context_enabled(parent)) {
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
}
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);

View File

@@ -2,7 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-y += shrinker.o extent_cache.o
f2fs-y += shrinker.o extent_cache.o sysfs.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o

View File

@@ -210,15 +210,16 @@ static int __f2fs_set_acl(struct inode *inode, int type,
void *value = NULL;
size_t size = 0;
int error;
umode_t mode = inode->i_mode;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
if (acl && !ipage) {
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
return error;
set_acl_inode(inode, inode->i_mode);
set_acl_inode(inode, mode);
}
break;
@@ -236,7 +237,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
if (IS_ERR(value)) {
clear_inode_flag(inode, FI_ACL_MODE);
return (int)PTR_ERR(value);
return PTR_ERR(value);
}
}

View File

@@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_bios(sbi);
f2fs_flush_merged_writes(sbi);
}
/*
@@ -163,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.op_flags = sync ? (REQ_SYNC | REQ_META | REQ_PRIO) :
REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
};
struct blk_plug plug;
@@ -208,12 +209,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
fio.old_blkaddr = fio.new_blkaddr;
f2fs_submit_page_mbio(&fio);
f2fs_submit_page_bio(&fio);
f2fs_put_page(page, 0);
}
out:
f2fs_submit_merged_bio(sbi, META, READ);
blk_finish_plug(&plug);
return blkno - start;
}
@@ -232,8 +231,9 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
static int __f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc,
enum iostat_type io_type)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
@@ -246,16 +246,17 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
write_meta_page(sbi, page);
write_meta_page(sbi, page, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
f2fs_submit_merged_write_cond(sbi, page->mapping->host,
0, page->index, META);
unlock_page(page);
if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
f2fs_submit_merged_write(sbi, META);
return 0;
@@ -264,23 +265,33 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
return __f2fs_write_meta_page(page, wbc, FS_META_IO);
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff, written;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
/* if locked failed, cp will flush dirty pages instead */
if (!mutex_trylock(&sbi->cp_mutex))
goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -292,7 +303,7 @@ skip_write:
}
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
long nr_to_write, enum iostat_type io_type)
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
@@ -343,7 +354,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
if (mapping->a_ops->writepage(page, &wbc)) {
if (__f2fs_write_meta_page(page, &wbc, io_type)) {
unlock_page(page);
break;
}
@@ -357,7 +368,7 @@ continue_unlock:
}
stop:
if (nwritten)
f2fs_submit_merged_bio(sbi, type, WRITE);
f2fs_submit_merged_write(sbi, type);
blk_finish_plug(&plug);
@@ -494,6 +505,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
#endif
@@ -566,7 +578,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (ni.blk_addr != NULL_ADDR) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: orphan failed (ino=%x), run fsck to fix.",
"%s: orphan failed (ino=%x) by kernel, retry mount.",
__func__, ino);
return -EIO;
}
@@ -577,11 +589,24 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
int err;
unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
f2fs_enable_quota_files(sbi);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
@@ -597,14 +622,21 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
return err;
goto out;
}
}
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
return err;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
@@ -676,14 +708,13 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset >= blk_size) {
if (crc_offset > (blk_size - sizeof(__le32))) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ crc_offset)));
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
@@ -816,7 +847,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
if (!f2fs_is_volatile_file(inode))
list_add_tail(&F2FS_I(inode)->dirty_list,
&sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
@@ -874,6 +907,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
struct inode *inode;
struct f2fs_inode_info *fi;
bool is_dir = (type == DIR_INODE);
unsigned long ino = 0;
trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
get_pages(sbi, is_dir ?
@@ -896,14 +930,30 @@ retry:
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
unsigned long cur_ino = inode->i_ino;
if (is_dir)
F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
if (is_dir)
F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
if (ino == cur_ino) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
} else {
ino = cur_ino;
}
} else {
/*
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
f2fs_submit_merged_bio(sbi, DATA, WRITE);
f2fs_submit_merged_write(sbi, DATA);
cond_resched();
}
goto retry;
@@ -941,6 +991,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
return 0;
}
static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
nid_t last_nid = nm_i->next_scan_nid;
next_free_nid(sbi, &last_nid);
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
}
/*
* Freeze all the FS-operations for checkpoint.
*/
@@ -964,33 +1027,47 @@ retry_flush_dents:
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
goto retry_flush_dents;
}
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
/*
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush.
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
retry_flush_nodes:
down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
err = sync_node_pages(sbi, &wbc);
err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
goto out;
}
cond_resched();
goto retry_flush_nodes;
}
/*
* sbi->node_change is used only for AIO write_begin path which produces
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
out:
blk_finish_plug(&plug);
return err;
@@ -999,8 +1076,6 @@ out:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
@@ -1023,15 +1098,24 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long flags;
spin_lock(&sbi->cp_lock);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (cpc->reason == CP_UMOUNT)
if ((cpc->reason & CP_UMOUNT) &&
le32_to_cpu(ckpt->cp_pack_total_block_count) >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
if (cpc->reason & CP_FASTBOOT)
__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
@@ -1047,15 +1131,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* set this flag to activate crc|cp_ver for recovery */
__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
spin_unlock(&sbi->cp_lock);
spin_unlock_irqrestore(&sbi->cp_lock, flags);
}
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
block_t start_blk;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
@@ -1067,19 +1150,16 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
next_free_nid(sbi, &last_nid);
/*
* modify checkpoint
* version number is already updated
*/
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
@@ -1098,18 +1178,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
spin_lock(&sbi->cp_lock);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
spin_unlock(&sbi->cp_lock);
spin_unlock_irqrestore(&sbi->cp_lock, flags);
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1138,6 +1214,27 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
block_t blk;
cp_ver |= ((__u64)crc32 << 32);
*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
}
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1187,7 +1284,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
@@ -1226,8 +1323,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1246,10 +1343,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
f2fs_flush_merged_bios(sbi);
f2fs_flush_merged_writes(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason == CP_DISCARD) {
if (cpc->reason & CP_DISCARD) {
if (!exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
@@ -1274,7 +1371,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
flush_nat_entries(sbi);
flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
@@ -1287,7 +1384,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason == CP_RECOVERY)
if (cpc->reason & CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver);

View File

@@ -1,240 +0,0 @@
/*
* linux/fs/f2fs/crypto_key.c
*
* Copied from linux/fs/f2fs/crypto_key.c
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions for f2fs
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <uapi/linux/keyctl.h>
#include <crypto/hash.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "xattr.h"
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
{
struct f2fs_completion_result *ecr = req->data;
if (rc == -EINPROGRESS)
return;
ecr->res = rc;
complete(&ecr->completion);
}
/**
* f2fs_derive_key_aes() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivatio.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
* Return: Zero on success; non-zero otherwise.
*/
static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
char source_key[F2FS_AES_256_XTS_KEY_SIZE],
char derived_key[F2FS_AES_256_XTS_KEY_SIZE])
{
int res = 0;
struct ablkcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
req = ablkcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
ablkcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
derive_crypt_complete, &ecr);
res = crypto_ablkcipher_setkey(tfm, deriving_key,
F2FS_AES_128_ECB_KEY_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE);
ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
F2FS_AES_256_XTS_KEY_SIZE, NULL);
res = crypto_ablkcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
out:
if (req)
ablkcipher_request_free(req);
if (tfm)
crypto_free_ablkcipher(tfm);
return res;
}
static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
{
if (!ci)
return;
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(f2fs_crypt_info_cachep, ci);
}
void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_crypt_info *prev;
if (ci == NULL)
ci = ACCESS_ONCE(fi->i_crypt_info);
if (ci == NULL)
return;
prev = cmpxchg(&fi->i_crypt_info, ci, NULL);
if (prev != ci)
return;
f2fs_free_crypt_info(ci);
}
int f2fs_get_encryption_info(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_crypt_info *crypt_info;
char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
(F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
struct key *keyring_key = NULL;
struct f2fs_encryption_key *master_key;
struct f2fs_encryption_context ctx;
const struct user_key_payload *ukp;
struct crypto_ablkcipher *ctfm;
const char *cipher_str;
char raw_key[F2FS_MAX_KEY_SIZE];
char mode;
int res;
if (fi->i_crypt_info)
return 0;
res = f2fs_crypto_initialize();
if (res)
return res;
res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx), NULL);
if (res < 0)
return res;
else if (res != sizeof(ctx))
return -EINVAL;
res = 0;
crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS);
if (!crypt_info)
return -ENOMEM;
crypt_info->ci_flags = ctx.flags;
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
mode = crypt_info->ci_data_mode;
else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
mode = crypt_info->ci_filename_mode;
else
BUG();
switch (mode) {
case F2FS_ENCRYPTION_MODE_AES_256_XTS:
cipher_str = "xts(aes)";
break;
case F2FS_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
default:
printk_once(KERN_WARNING
"f2fs: unsupported key mode %d (ino %u)\n",
mode, (unsigned) inode->i_ino);
res = -ENOKEY;
goto out;
}
memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX,
F2FS_KEY_DESC_PREFIX_SIZE);
sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE,
"%*phN", F2FS_KEY_DESCRIPTOR_SIZE,
ctx.master_key_descriptor);
full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
(2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0';
keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
if (IS_ERR(keyring_key)) {
res = PTR_ERR(keyring_key);
keyring_key = NULL;
goto out;
}
BUG_ON(keyring_key->type != &key_type_logon);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
res = -EINVAL;
goto out;
}
master_key = (struct f2fs_encryption_key *)ukp->data;
BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE !=
F2FS_KEY_DERIVATION_NONCE_SIZE);
BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE);
res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
if (res)
goto out;
ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
"%s: error %d (inode %u) allocating crypto tfm\n",
__func__, res, (unsigned) inode->i_ino);
goto out;
}
crypt_info->ci_ctfm = ctfm;
crypto_ablkcipher_clear_flags(ctfm, ~0);
crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
res = crypto_ablkcipher_setkey(ctfm, raw_key,
f2fs_encryption_key_size(mode));
if (res)
goto out;
if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) == NULL)
crypt_info = NULL;
out:
if (res == -ENOKEY && !S_ISREG(inode->i_mode))
res = 0;
key_put(keyring_key);
f2fs_free_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
}
int f2fs_has_encryption_key(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
return (fi->i_crypt_info != NULL);
}

View File

@@ -1,248 +0,0 @@
/*
* copied from linux/fs/ext4/crypto_policy.c
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility.
*
* This contains encryption policy functions for f2fs with some modifications
* to support f2fs-specific xattr APIs.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#include <linux/random.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "xattr.h"
static int f2fs_inode_has_encryption_context(struct inode *inode)
{
int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL);
return (res > 0);
}
/*
* check whether the policy is consistent with the encryption context
* for the inode
*/
static int f2fs_is_encryption_context_consistent_with_policy(
struct inode *inode, const struct f2fs_encryption_policy *policy)
{
struct f2fs_encryption_context ctx;
int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), NULL);
if (res != sizeof(ctx))
return 0;
return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx.flags == policy->flags) &&
(ctx.contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx.filenames_encryption_mode ==
policy->filenames_encryption_mode));
}
static int f2fs_create_encryption_context_from_policy(
struct inode *inode, const struct f2fs_encryption_policy *policy)
{
struct f2fs_encryption_context ctx;
ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
F2FS_KEY_DESCRIPTOR_SIZE);
if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid contents encryption mode %d\n", __func__,
policy->contents_encryption_mode);
return -EINVAL;
}
if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid filenames encryption mode %d\n", __func__,
policy->filenames_encryption_mode);
return -EINVAL;
}
if (policy->flags & ~F2FS_POLICY_FLAGS_VALID)
return -EINVAL;
ctx.contents_encryption_mode = policy->contents_encryption_mode;
ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
ctx.flags = policy->flags;
BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE);
get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), NULL, XATTR_CREATE);
}
int f2fs_process_policy(const struct f2fs_encryption_policy *policy,
struct inode *inode)
{
if (!inode_owner_or_capable(inode))
return -EACCES;
if (policy->version != 0)
return -EINVAL;
if (!S_ISDIR(inode->i_mode))
return -EINVAL;
if (!f2fs_inode_has_encryption_context(inode)) {
if (!f2fs_empty_dir(inode))
return -ENOTEMPTY;
return f2fs_create_encryption_context_from_policy(inode,
policy);
}
if (f2fs_is_encryption_context_consistent_with_policy(inode, policy))
return 0;
printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
__func__);
return -EINVAL;
}
int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
{
struct f2fs_encryption_context ctx;
int res;
if (!f2fs_encrypted_inode(inode))
return -ENODATA;
res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx), NULL);
if (res != sizeof(ctx))
return -ENODATA;
if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
policy->version = 0;
policy->contents_encryption_mode = ctx.contents_encryption_mode;
policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
policy->flags = ctx.flags;
memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
F2FS_KEY_DESCRIPTOR_SIZE);
return 0;
}
int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
struct inode *child)
{
const struct f2fs_crypt_info *parent_ci, *child_ci;
struct f2fs_encryption_context parent_ctx, child_ctx;
int res;
/* No restrictions on file types which are never encrypted */
if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
!S_ISLNK(child->i_mode))
return 1;
/* No restrictions if the parent directory is unencrypted */
if (!f2fs_encrypted_inode(parent))
return 1;
/* Encrypted directories must not contain unencrypted files */
if (!f2fs_encrypted_inode(child))
return 0;
/*
* Both parent and child are encrypted, so verify they use the same
* encryption policy. Compare the fscrypt_info structs if the keys are
* available, otherwise retrieve and compare the fscrypt_contexts.
*
* Note that the fscrypt_context retrieval will be required frequently
* when accessing an encrypted directory tree without the key.
* Performance-wise this is not a big deal because we already don't
* really optimize for file access without the key (to the extent that
* such access is even possible), given that any attempted access
* already causes a fscrypt_context retrieval and keyring search.
*
* In any case, if an unexpected error occurs, fall back to "forbidden".
*/
res = f2fs_get_encryption_info(parent);
if (res)
return 0;
res = f2fs_get_encryption_info(child);
if (res)
return 0;
parent_ci = F2FS_I(parent)->i_crypt_info;
child_ci = F2FS_I(child)->i_crypt_info;
if (parent_ci && child_ci) {
return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
(parent_ci->ci_filename_mode ==
child_ci->ci_filename_mode) &&
(parent_ci->ci_flags == child_ci->ci_flags);
}
res = f2fs_getxattr(parent, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
&parent_ctx, sizeof(parent_ctx), NULL);
if (res != sizeof(parent_ctx))
return 0;
res = f2fs_getxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
&child_ctx, sizeof(child_ctx), NULL);
if (res != sizeof(child_ctx))
return 0;
return memcmp(parent_ctx.master_key_descriptor,
child_ctx.master_key_descriptor,
F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(parent_ctx.contents_encryption_mode ==
child_ctx.contents_encryption_mode) &&
(parent_ctx.filenames_encryption_mode ==
child_ctx.filenames_encryption_mode) &&
(parent_ctx.flags == child_ctx.flags);
}
/**
* f2fs_inherit_context() - Sets a child context from its parent
* @parent: Parent inode from which the context is inherited.
* @child: Child inode that inherits the context from @parent.
*
* Return: Zero on success, non-zero otherwise
*/
int f2fs_inherit_context(struct inode *parent, struct inode *child,
struct page *ipage)
{
struct f2fs_encryption_context ctx;
struct f2fs_crypt_info *ci;
int res;
res = f2fs_get_encryption_info(parent);
if (res < 0)
return res;
ci = F2FS_I(parent)->i_crypt_info;
BUG_ON(ci == NULL);
ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
F2FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
sizeof(ctx), ipage, XATTR_CREATE);
}

File diff suppressed because it is too large Load Diff

View File

@@ -51,9 +51,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
}
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -64,6 +81,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
@@ -78,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -91,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = curseg->segno / sbi->segs_per_sec;
si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
for (i = 0; i < 2; i++) {
@@ -116,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
blks_per_sec = BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
dist = abs(vblocks - hblks_per_sec);
bimodal += dist * dist;
@@ -148,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
if (si->base_mem)
goto get_cache;
si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
/* build stat */
si->base_mem = sizeof(struct f2fs_stat_info);
/* build superblock */
si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
si->base_mem += 2 * sizeof(struct f2fs_inode_info);
si->base_mem += sizeof(*sbi->ckpt);
si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
@@ -185,6 +209,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
get_cache:
si->cache_mem = 0;
@@ -196,6 +224,11 @@ get_cache:
/* build merge flush thread */
if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
if (SM_I(sbi)->dcc_info) {
si->cache_mem += sizeof(struct discard_cmd_control);
si->cache_mem += sizeof(struct discard_cmd) *
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
}
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -256,8 +289,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
seq_printf(s, " - Orphan Inode: %u\n",
si->orphans);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -316,10 +349,16 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - IO (CP: %4d, Data: %4d)\n",
si->nr_wb_cp_data, si->nr_wb_data);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
"volatile IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -332,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
si->free_nids, si->alloc_nids);
seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
si->free_nids, si->avail_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
@@ -419,7 +458,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inplace_count, 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);

View File

@@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
@@ -111,8 +111,6 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct fscrypt_str *name = &fname->disk_name;
if (max_slots)
*max_slots = 0;
@@ -130,17 +128,9 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
continue;
}
/* encrypted case */
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
/* show encrypted name */
if (fname->hash) {
if (de->hash_code == cpu_to_le32(fname->hash))
goto found;
} else if (de_name.len == name->len &&
de->hash_code == namehash &&
!memcmp(de_name.name, name->name, name->len))
if (de->hash_code == namehash &&
fscrypt_match_name(fname, d->filename[bit_pos],
le16_to_cpu(de->name_len)))
goto found;
if (max_slots && max_len > *max_slots)
@@ -170,12 +160,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
bool room = false;
int max_slots;
f2fs_hash_t namehash;
if(fname->hash)
namehash = cpu_to_le32(fname->hash);
else
namehash = f2fs_dentry_hash(&name);
f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
@@ -250,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
break;
}
out:
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
return de;
}
@@ -268,7 +256,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
*res_page = ERR_PTR(err);
if (err == -ENOENT)
*res_page = NULL;
else
*res_page = ERR_PTR(err);
return NULL;
}
@@ -330,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
int update_dent_inode(struct inode *inode, struct inode *to,
const struct qstr *name)
{
struct page *page;
if (file_enc_name(to))
return 0;
page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
init_dent_inode(name, page);
f2fs_put_page(page, 1);
return 0;
}
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
@@ -377,7 +350,7 @@ static int make_empty_dir(struct inode *inode,
dentry_blk = kmap_atomic(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
kunmap_atomic(dentry_blk);
@@ -431,15 +404,19 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
if (new_name)
if (new_name) {
init_dent_inode(new_name, page);
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
/*
* This file should be checkpointed during fsync.
* We lost i_pino from now on.
*/
if (is_inode_flag_set(inode, FI_INC_LINK)) {
file_lost_pino(inode);
if (!S_ISDIR(inode->i_mode))
file_lost_pino(inode);
/*
* If link the tmpfile to alias through linkat path,
* we should remove this inode from orphan list.
@@ -535,7 +512,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
level = 0;
slots = GET_DENTRY_SLOTS(new_name->len);
dentry_hash = f2fs_dentry_hash(new_name);
dentry_hash = f2fs_dentry_hash(new_name, NULL);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
@@ -545,8 +522,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
@@ -590,11 +569,9 @@ add_dentry:
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
@@ -643,14 +620,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
struct page *page = NULL;
struct f2fs_dir_entry *de = NULL;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
/*
* An immature stakable filesystem shows a race condition between lookup
* and create. If we have same task when doing lookup and create, it's
* definitely fine as expected by VFS normally. Otherwise, let's just
* verify on-disk dentry one more time, which guarantees filesystem
* consistency more.
*/
if (current != F2FS_I(dir)->task) {
de = __f2fs_find_entry(dir, &fname, &page);
F2FS_I(dir)->task = NULL;
}
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
err = -EEXIST;
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
}
fscrypt_free_filename(&fname);
return err;
}
@@ -708,6 +705,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
struct address_space *mapping = page_mapping(page);
unsigned long flags;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
@@ -721,7 +720,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
__clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
@@ -738,6 +737,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
@@ -882,7 +886,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(inode, &d, dentry_blk);
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);

View File

@@ -18,6 +18,179 @@
#include "node.h"
#include <trace/events/f2fs.h>
static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
unsigned int ofs)
{
if (cached_re) {
if (cached_re->ofs <= ofs &&
cached_re->ofs + cached_re->len > ofs) {
return cached_re;
}
}
return NULL;
}
static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
unsigned int ofs)
{
struct rb_node *node = root->rb_node;
struct rb_entry *re;
while (node) {
re = rb_entry(node, struct rb_entry, rb_node);
if (ofs < re->ofs)
node = node->rb_left;
else if (ofs >= re->ofs + re->len)
node = node->rb_right;
else
return re;
}
return NULL;
}
struct rb_entry *__lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
re = __lookup_rb_tree_fast(cached_re, ofs);
if (!re)
return __lookup_rb_tree_slow(root, ofs);
return re;
}
struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs)
{
struct rb_node **p = &root->rb_node;
struct rb_entry *re;
while (*p) {
*parent = *p;
re = rb_entry(*parent, struct rb_entry, rb_node);
if (ofs < re->ofs)
p = &(*p)->rb_left;
else if (ofs >= re->ofs + re->len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
return p;
}
/*
* lookup rb entry in position of @ofs in rb-tree,
* if hit, return the entry, otherwise, return NULL
* @prev_ex: extent before ofs
* @next_ex: extent after ofs
* @insert_p: insert point for new extent at ofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
struct rb_entry **next_entry,
struct rb_node ***insert_p,
struct rb_node **insert_parent,
bool force)
{
struct rb_node **pnode = &root->rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct rb_entry *re = cached_re;
*insert_p = NULL;
*insert_parent = NULL;
*prev_entry = NULL;
*next_entry = NULL;
if (RB_EMPTY_ROOT(root))
return NULL;
if (re) {
if (re->ofs <= ofs && re->ofs + re->len > ofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
re = rb_entry(*pnode, struct rb_entry, rb_node);
if (ofs < re->ofs)
pnode = &(*pnode)->rb_left;
else if (ofs >= re->ofs + re->len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
re = rb_entry(parent, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs > re->ofs)
tmp_node = rb_next(parent);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs < re->ofs)
tmp_node = rb_prev(parent);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
return NULL;
lookup_neighbors:
if (ofs == re->ofs || force) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&re->rb_node);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
if (ofs == re->ofs + re->len - 1 || force) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&re->rb_node);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
return re;
}
bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first(root), *next;
struct rb_entry *cur_re, *next_re;
if (!cur)
return true;
while (cur) {
next = rb_next(cur);
if (!next)
return true;
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
"cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
cur = next;
}
#endif
return true;
}
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
@@ -77,7 +250,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +267,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
@@ -102,36 +275,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
@@ -177,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode,
}
/* return true, if inode page is changed */
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
@@ -215,6 +358,16 @@ out:
return false;
}
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
bool ret = __f2fs_init_extent_tree(inode, i_ext);
if (!F2FS_I(inode)->extent_tree)
set_inode_flag(inode, FI_NO_EXTENT);
return ret;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
@@ -237,17 +390,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
en = (struct extent_node *)__lookup_rb_tree(&et->root,
(struct rb_entry *)et->cached_en, pgofs);
if (!en)
goto out;
if (en == et->cached_en)
stat_inc_cached_node_hit(sbi);
else
stat_inc_rbtree_node_hit(sbi);
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
@@ -256,83 +416,6 @@ out:
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct inode *inode,
struct extent_tree *et, struct extent_info *ei,
struct extent_node *prev_ex,
@@ -387,17 +470,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
@@ -413,7 +486,7 @@ do_insert:
return en;
}
static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
static void f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -426,7 +499,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
return;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
@@ -434,7 +507,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
return;
}
prev = et->largest;
@@ -447,8 +520,11 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
&insert_p, &insert_parent, false);
if (!en)
en = next_en;
@@ -531,8 +607,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__free_extent_tree(sbi, et);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -548,7 +622,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
@@ -570,11 +644,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
goto unlock_out;
cond_resched();
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
@@ -604,7 +678,7 @@ free_node:
spin_unlock(&sbi->extent_lock);
unlock_out:
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
@@ -651,10 +725,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
return;
}
@@ -662,12 +736,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
@@ -714,7 +788,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
mutex_init(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
atomic_set(&sbi->total_ext_tree, 0);

File diff suppressed because it is too large Load Diff

View File

@@ -1,150 +0,0 @@
/*
* linux/fs/f2fs/f2fs_crypto.h
*
* Copied from linux/fs/ext4/ext4_crypto.h
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption header content for f2fs
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#ifndef _F2FS_CRYPTO_H
#define _F2FS_CRYPTO_H
#include <linux/fs.h>
#define F2FS_KEY_DESCRIPTOR_SIZE 8
/* Policy provided via an ioctl on the topmost directory */
struct f2fs_encryption_policy {
char version;
char contents_encryption_mode;
char filenames_encryption_mode;
char flags;
char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
} __attribute__((__packed__));
#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
#define F2FS_KEY_DERIVATION_NONCE_SIZE 16
#define F2FS_POLICY_FLAGS_PAD_4 0x00
#define F2FS_POLICY_FLAGS_PAD_8 0x01
#define F2FS_POLICY_FLAGS_PAD_16 0x02
#define F2FS_POLICY_FLAGS_PAD_32 0x03
#define F2FS_POLICY_FLAGS_PAD_MASK 0x03
#define F2FS_POLICY_FLAGS_VALID 0x03
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct f2fs_encryption_context {
char format;
char contents_encryption_mode;
char filenames_encryption_mode;
char flags;
char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE];
} __attribute__((__packed__));
/* Encryption parameters */
#define F2FS_XTS_TWEAK_SIZE 16
#define F2FS_AES_128_ECB_KEY_SIZE 16
#define F2FS_AES_256_GCM_KEY_SIZE 32
#define F2FS_AES_256_CBC_KEY_SIZE 32
#define F2FS_AES_256_CTS_KEY_SIZE 32
#define F2FS_AES_256_XTS_KEY_SIZE 64
#define F2FS_MAX_KEY_SIZE 64
#define F2FS_KEY_DESC_PREFIX "f2fs:"
#define F2FS_KEY_DESC_PREFIX_SIZE 5
struct f2fs_encryption_key {
__u32 mode;
char raw[F2FS_MAX_KEY_SIZE];
__u32 size;
} __attribute__((__packed__));
struct f2fs_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
};
#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define F2FS_WRITE_PATH_FL 0x00000002
struct f2fs_crypto_ctx {
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
char flags; /* Flags */
};
struct f2fs_completion_result {
struct completion completion;
int res;
};
#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \
struct f2fs_completion_result ecr = { \
COMPLETION_INITIALIZER((ecr).completion), 0 }
static inline int f2fs_encryption_key_size(int mode)
{
switch (mode) {
case F2FS_ENCRYPTION_MODE_AES_256_XTS:
return F2FS_AES_256_XTS_KEY_SIZE;
case F2FS_ENCRYPTION_MODE_AES_256_GCM:
return F2FS_AES_256_GCM_KEY_SIZE;
case F2FS_ENCRYPTION_MODE_AES_256_CBC:
return F2FS_AES_256_CBC_KEY_SIZE;
case F2FS_ENCRYPTION_MODE_AES_256_CTS:
return F2FS_AES_256_CTS_KEY_SIZE;
default:
BUG();
}
return 0;
}
#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4
#define F2FS_CRYPTO_BLOCK_SIZE 16
#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
*/
struct f2fs_encrypted_symlink_data {
__le16 len;
char encrypted_path[1];
} __attribute__((__packed__));
/**
* This function is used to calculate the disk space required to
* store a filename of length l in encrypted symlink format.
*/
static inline u32 encrypted_symlink_data_len(u32 l)
{
return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1);
}
#endif /* _F2FS_CRYPTO_H */

View File

@@ -21,6 +21,7 @@
#include <linux/mount.h>
#include <linux/pagevec.h>
#include <linux/random.h>
#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
@@ -33,6 +34,19 @@
#include "trace.h"
#include <trace/events/f2fs.h>
static int f2fs_filemap_fault(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
int err;
down_read(&F2FS_I(inode)->i_mmap_sem);
err = filemap_fault(vma, vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
return err;
}
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
@@ -60,13 +74,14 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_balance_fs(sbi, dn.node_changed);
file_update_time(vma->vm_file);
down_read(&F2FS_I(inode)->i_mmap_sem);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
!PageUptodate(page))) {
unlock_page(page);
err = -EFAULT;
goto out;
goto out_sem;
}
/*
@@ -86,15 +101,19 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
if (!PageUptodate(page))
SetPageUptodate(page);
f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
f2fs_wait_on_page_writeback(page, DATA, false);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
if (f2fs_encrypted_file(inode))
f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr);
out_sem:
up_read(&F2FS_I(inode)->i_mmap_sem);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
@@ -102,7 +121,7 @@ out:
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault,
.fault = f2fs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
};
@@ -117,11 +136,6 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
if (update_dent_inode(inode, inode, &dentry->d_name)) {
dput(dentry);
return 0;
}
*pino = parent_ino(dentry);
dput(dentry);
return 1;
@@ -142,8 +156,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
else if (test_opt(sbi, FASTBOOT))
need_cp = true;
else if (sbi->active_logs == 2)
@@ -169,7 +181,6 @@ static void try_to_fix_pino(struct inode *inode)
nid_t pino;
down_write(&fi->i_sem);
fi->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
@@ -268,9 +279,19 @@ sync_nodes:
goto sync_nodes;
}
ret = wait_on_node_pages_writeback(sbi, ino);
if (ret)
goto out;
/*
* If it's atomic_write, it's just fine to keep write ordering. So
* here we don't need to wait for node write completion, since we use
* node chain which serializes node blocks. If one of node writes are
* reordered, we can see simply broken chain, resulting in stopping
* roll-forward recovery. It means we'll recover all or none node blocks
* given fsync mark.
*/
if (!atomic) {
ret = wait_on_node_pages_writeback(sbi, ino);
if (ret)
goto out;
}
/* once recovery info is written, don't need to tack this */
remove_ino_entry(sbi, ino, APPEND_INO);
@@ -278,7 +299,8 @@ sync_nodes:
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
if (!atomic)
ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -375,7 +397,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
dn.ofs_in_node++, pgofs++,
data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
block_t blkaddr;
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
if (__found_offset(blkaddr, dirty, pgofs, whence)) {
f2fs_put_dnode(&dn);
@@ -423,14 +446,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file_inode(file);
int err;
if (f2fs_encrypted_inode(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err)
return 0;
if (!f2fs_encrypted_inode(inode))
return -ENOKEY;
}
/* we don't need to use inline_data strictly */
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -443,11 +458,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
static int f2fs_file_open(struct inode *inode, struct file *filp)
{
int ret = generic_file_open(inode, filp);
struct dentry *dir;
if (!ret && f2fs_encrypted_inode(inode)) {
ret = fscrypt_get_encryption_info(inode);
if (f2fs_encrypted_inode(inode)) {
int ret = fscrypt_get_encryption_info(inode);
if (ret)
return -EACCES;
if (!fscrypt_has_encryption_key(inode))
@@ -460,7 +474,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
return -EPERM;
}
dput(dir);
return ret;
return dquot_file_open(inode, filp);
}
int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -469,9 +483,13 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
struct f2fs_node *raw_node;
int nr_free = 0, ofs = dn->ofs_in_node, len = count;
__le32 *addr;
int base = 0;
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
base = get_extra_isize(dn->inode);
raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + ofs;
addr = blkaddr_in_node(raw_node) + base + ofs;
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
@@ -531,12 +549,14 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return 0;
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user(page, offset, PAGE_SIZE - offset);
if (!cache_only || !f2fs_encrypted_inode(inode) ||
!S_ISREG(inode->i_mode))
/* An encrypted inode should have a key and truncate the last page. */
f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
if (!cache_only)
set_page_dirty(page);
f2fs_put_page(page, 1);
return 0;
@@ -569,10 +589,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
if (truncate_inline_inode(ipage, from))
set_page_dirty(ipage);
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
@@ -621,6 +638,12 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
#endif
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -642,7 +665,6 @@ int f2fs_getattr(struct vfsmount *mnt,
{
struct inode *inode = d_inode(dentry);
generic_fillattr(inode, stat);
stat->blocks <<= 3;
return 0;
}
@@ -686,14 +708,34 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
return err;
if (is_quota_modification(inode, attr)) {
err = dquot_initialize(inode);
if (err)
return err;
}
if ((attr->ia_valid & ATTR_UID &&
!uid_eq(attr->ia_uid, inode->i_uid)) ||
(attr->ia_valid & ATTR_GID &&
!gid_eq(attr->ia_gid, inode->i_gid))) {
err = dquot_transfer(inode, attr);
if (err)
return err;
}
if (attr->ia_valid & ATTR_SIZE) {
if (f2fs_encrypted_inode(inode) &&
fscrypt_get_encryption_info(inode))
return -EACCES;
if (f2fs_encrypted_inode(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err)
return err;
if (!fscrypt_has_encryption_key(inode))
return -ENOKEY;
}
if (attr->ia_size <= i_size_read(inode)) {
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
err = f2fs_truncate(inode);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (err)
return err;
} else {
@@ -701,7 +743,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size);
up_write(&F2FS_I(inode)->i_mmap_sem);
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
@@ -847,12 +891,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
}
}
@@ -883,7 +929,8 @@ next_dnode:
done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
dn.ofs_in_node, len);
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
if (!is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
@@ -959,15 +1006,15 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
do {
dn.data_blkaddr = datablock_addr(dn.node_page,
dn.ofs_in_node);
dn.data_blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
truncate_data_blocks_range(&dn, 1);
if (do_replace[i]) {
f2fs_i_blocks_write(src_inode,
1, false);
1, false, false);
f2fs_i_blocks_write(dst_inode,
1, true);
1, true, false);
f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
blkaddr[i], ni.version, true, false);
@@ -1019,11 +1066,11 @@ static int __exchange_data_block(struct inode *src_inode,
while (len) {
olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
if (!src_blkaddr)
return -ENOMEM;
do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL);
do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
@@ -1091,16 +1138,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
return ret;
goto out;
truncate_pagecache(inode, offset);
ret = f2fs_do_collapse(inode, pg_start, pg_end);
if (ret)
return ret;
goto out;
/* write out all moved pages, if possible */
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1113,6 +1161,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1126,7 +1176,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
int ret;
for (; index < end; index++, dn->ofs_in_node++) {
if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR)
if (datablock_addr(dn->inode, dn->node_page,
dn->ofs_in_node) == NULL_ADDR)
count++;
}
@@ -1137,8 +1188,8 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
dn->ofs_in_node = ofs_in_node;
for (index = start; index < end; index++, dn->ofs_in_node++) {
dn->data_blkaddr =
datablock_addr(dn->node_page, dn->ofs_in_node);
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
/*
* reserve_new_blocks will not guarantee entire block
* allocation.
@@ -1177,9 +1228,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
return ret;
goto out_sem;
truncate_pagecache_range(inode, offset, offset + len - 1);
@@ -1193,17 +1245,15 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
return ret;
goto out_sem;
if (offset + len > new_size)
new_size = offset + len;
new_size = max_t(loff_t, new_size, offset + len);
} else {
if (off_start) {
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
return ret;
goto out_sem;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1252,6 +1302,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
out:
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size);
out_sem:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1264,8 +1316,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
int ret = 0;
new_size = i_size_read(inode) + len;
if (new_size > inode->i_sb->s_maxbytes)
return -EFBIG;
ret = inode_newsize_ok(inode, new_size);
if (ret)
return ret;
if (offset >= i_size_read(inode))
return -EINVAL;
@@ -1280,14 +1333,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
return ret;
goto out;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
return ret;
goto out;
truncate_pagecache(inode, offset);
@@ -1316,6 +1370,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
}
@@ -1435,6 +1491,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
set_inode_flag(inode, FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(inode, FI_DROP_CACHE);
@@ -1442,17 +1499,20 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
return 0;
}
#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
static int f2fs_file_flush(struct file *file, fl_owner_t id)
{
if (S_ISDIR(mode))
return flags;
else if (S_ISREG(mode))
return flags & F2FS_REG_FLMASK;
else
return flags & F2FS_OTHER_FLMASK;
struct inode *inode = file_inode(file);
/*
* If the process doing a transaction is crashed, we should do
* roll-back. Otherwise, other reader/write can see corrupted database
* until all the writers close its file. Since this should be done
* before dropping file lock, it needs to do in ->flush.
*/
if (f2fs_is_atomic_file(inode) &&
F2FS_I(inode)->inmem_task == current)
drop_inmem_pages(inode);
return 0;
}
static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
@@ -1481,28 +1541,34 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
if (ret)
return ret;
flags = f2fs_mask_flags(inode->i_mode, flags);
inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
ret = -EPERM;
goto unlock_out;
}
flags = f2fs_mask_flags(inode->i_mode, flags);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
inode_unlock(inode);
ret = -EPERM;
goto out;
goto unlock_out;
}
}
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
inode_unlock(inode);
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
out:
f2fs_mark_inode_dirty_sync(inode, false);
unlock_out:
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
}
@@ -1522,6 +1588,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1536,20 +1605,27 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
set_inode_flag(inode, FI_ATOMIC_FILE);
set_inode_flag(inode, FI_HOT_DATA);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (!get_dirty_pages(inode))
goto out;
goto inc_stat;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
if (ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
out:
clear_inode_flag(inode, FI_HOT_DATA);
goto out;
}
inc_stat:
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1580,10 +1656,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_HOT_DATA);
stat_dec_atomic_write(inode);
}
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
inode_unlock(inode);
@@ -1599,6 +1676,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1612,6 +1692,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (ret)
goto out;
stat_inc_volatile_write(inode);
stat_update_max_volatile_write(inode);
set_inode_flag(inode, FI_VOLATILE_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
out:
@@ -1667,6 +1750,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
@@ -1712,7 +1796,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
f2fs_stop_checkpoint(sbi, false);
break;
case F2FS_GOING_DOWN_METAFLUSH:
sync_meta_pages(sbi, META, LONG_MAX);
sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
break;
default:
@@ -1773,31 +1857,16 @@ static bool uuid_is_nonzero(__u8 u[16])
static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return fscrypt_process_policy(filp, &policy);
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
}
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int err;
err = fscrypt_get_policy(inode, &policy);
if (err)
return err;
if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
@@ -1863,7 +1932,51 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
mutex_lock(&sbi->gc_mutex);
}
ret = f2fs_gc(sbi, sync, true);
ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
out:
mnt_drop_write_file(filp);
return ret;
}
static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_gc_range range;
u64 end;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
sizeof(range)))
return -EFAULT;
if (f2fs_readonly(sbi->sb))
return -EROFS;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
end = range.start + range.len;
if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi))
return -EINVAL;
do_more:
if (!range.sync) {
if (!mutex_trylock(&sbi->gc_mutex)) {
ret = -EBUSY;
goto out;
}
} else {
mutex_lock(&sbi->gc_mutex);
}
ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
range.start += sbi->blocks_per_seg;
if (range.start <= end)
goto do_more;
out:
mnt_drop_write_file(filp);
return ret;
@@ -1897,17 +2010,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
struct extent_info ei;
struct extent_info ei = {0,0,0};
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
block_t blk_end = 0;
bool fragmented = false;
int err;
/* if in-place-update policy is enabled, don't waste time here */
if (need_inplace_update(inode))
if (need_inplace_update_policy(inode, NULL))
return -EINVAL;
pg_start = range->start >> PAGE_SHIFT;
@@ -1941,7 +2053,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
*/
while (map.m_lblk < pg_end) {
map.m_len = pg_end - map.m_lblk;
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
if (err)
goto out;
@@ -1965,7 +2077,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
map.m_lblk = pg_start;
map.m_len = pg_end - pg_start;
sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
/*
* make sure there are enough free section for LFS allocation, this can
@@ -1983,7 +2095,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
do_map:
map.m_len = pg_end - map.m_lblk;
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
if (err)
goto clear_out;
@@ -2042,42 +2154,40 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!S_ISREG(inode->i_mode))
if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
return -EINVAL;
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range)))
return -EFAULT;
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
return -EINVAL;
if (unlikely((range.start + range.len) >> PAGE_SHIFT >
sbi->max_file_blocks))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range))) {
err = -EFAULT;
goto out;
}
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) ||
range.len & (F2FS_BLKSIZE - 1)) {
err = -EINVAL;
goto out;
}
err = f2fs_defragment_range(sbi, filp, &range);
mnt_drop_write_file(filp);
f2fs_update_time(sbi, REQ_TIME);
if (err < 0)
goto out;
return err;
if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
sizeof(range)))
err = -EFAULT;
out:
mnt_drop_write_file(filp);
return err;
return -EFAULT;
return 0;
}
static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
@@ -2211,6 +2321,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
range.pos_out, range.len);
mnt_drop_write_file(filp);
if (err)
goto err_out;
if (copy_to_user((struct f2fs_move_range __user *)arg,
&range, sizeof(range)))
@@ -2220,6 +2332,79 @@ err_out:
return err;
}
static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct sit_info *sm = SIT_I(sbi);
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
sizeof(range)))
return -EFAULT;
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
sbi->segs_per_sec != 1) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
range.dev_num, sbi->s_ndevs,
sbi->segs_per_sec);
return -EINVAL;
}
ret = mnt_want_write_file(filp);
if (ret)
return ret;
if (range.dev_num != 0)
dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
start_segno = sm->last_victim[FLUSH_DEVICE];
if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
start_segno = dev_start_segno;
end_segno = min(start_segno + range.segments, dev_end_segno);
while (start_segno < end_segno) {
if (!mutex_trylock(&sbi->gc_mutex)) {
ret = -EBUSY;
goto out;
}
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
ret = f2fs_gc(sbi, true, true, start_segno);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
break;
start_segno++;
}
out:
mnt_drop_write_file(filp);
return ret;
}
static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
/* Must validate to set it with SQLite behavior in Android. */
sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
return put_user(sb_feature, (u32 __user *)arg);
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -2251,12 +2436,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
return f2fs_ioc_move_range(filp, arg);
case F2FS_IOC_FLUSH_DEVICE:
return f2fs_ioc_flush_device(filp, arg);
case F2FS_IOC_GET_FEATURES:
return f2fs_ioc_get_features(filp, arg);
default:
return -ENOTTY;
}
@@ -2269,16 +2460,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct blk_plug plug;
ssize_t ret;
if (f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode) &&
fscrypt_get_encryption_info(inode))
return -EACCES;
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
int err = f2fs_preallocate_blocks(iocb, from);
int err;
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
@@ -2286,6 +2476,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
inode_unlock(inode);
@@ -2322,10 +2516,12 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
case F2FS_IOC_GET_ENCRYPTION_POLICY:
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
break;
case F2FS_IOC_MOVE_RANGE:
case F2FS_IOC_FLUSH_DEVICE:
case F2FS_IOC_GET_FEATURES:
break;
default:
return -ENOIOCTLCMD;
@@ -2341,6 +2537,7 @@ const struct file_operations f2fs_file_operations = {
.open = f2fs_file_open,
.release = f2fs_release_file,
.mmap = f2fs_file_mmap,
.flush = f2fs_file_flush,
.fsync = f2fs_sync_file,
.fallocate = f2fs_fallocate,
.unlocked_ioctl = f2fs_ioctl,

View File

@@ -28,17 +28,23 @@ static int gc_thread_func(void *data)
struct f2fs_sb_info *sbi = data;
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
long wait_ms;
unsigned int wait_ms;
wait_ms = gc_th->min_sleep_time;
set_freezable();
do {
wait_event_interruptible_timeout(*wq,
kthread_should_stop() || freezing(current) ||
gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
/* give it a try one time */
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
if (try_to_freeze())
continue;
else
wait_event_interruptible_timeout(*wq,
kthread_should_stop(),
msecs_to_jiffies(wait_ms));
if (kthread_should_stop())
break;
@@ -48,10 +54,15 @@ static int gc_thread_func(void *data)
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT))
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
@@ -66,23 +77,28 @@ static int gc_thread_func(void *data)
* So, I'd like to wait some time to collect dirty segments.
*/
if (!mutex_trylock(&sbi->gc_mutex))
continue;
goto next;
if (gc_th->gc_urgent) {
wait_ms = gc_th->urgent_sleep_time;
goto do_gc;
}
if (!is_idle(sbi)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
continue;
goto next;
}
if (has_enough_invalid_blocks(sbi))
decrease_sleep_time(gc_th, &wait_ms);
else
increase_sleep_time(gc_th, &wait_ms);
do_gc:
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
@@ -90,6 +106,8 @@ static int gc_thread_func(void *data)
/* balancing f2fs's metadata periodically */
f2fs_balance_fs_bg(sbi);
next:
sb_end_write(sbi->sb);
} while (!kthread_should_stop());
return 0;
@@ -107,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
goto out;
}
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
gc_th->gc_idle = 0;
gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
@@ -166,10 +187,15 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
if (p->max_search > sbi->max_victim_search)
/* we need to check every dirty segments in the FG_GC case */
if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
/* let's select beginning hot/small space first */
if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
}
static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
@@ -179,7 +205,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
return sbi->blocks_per_seg * p->ofs_unit;
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
@@ -199,8 +225,12 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
if (no_fggc_candidate(sbi, secno))
continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
return GET_SEG_FROM_SEC(sbi, secno);
}
return NULL_SEGNO;
}
@@ -208,8 +238,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int secno = GET_SECNO(sbi, segno);
unsigned int start = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
unsigned long long mtime = 0;
unsigned int vblocks;
unsigned char age = 0;
@@ -218,7 +248,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
for (i = 0; i < sbi->segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
mtime = div_u64(mtime, sbi->segs_per_sec);
vblocks = div_u64(vblocks, sbi->segs_per_sec);
@@ -237,6 +267,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
unsigned int segno)
{
unsigned int valid_blocks =
get_valid_blocks(sbi, segno, true);
return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
valid_blocks * 2 : valid_blocks;
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
@@ -245,7 +285,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
return get_greedy_cost(sbi, segno);
else
return get_cb_cost(sbi, segno);
}
@@ -274,6 +314,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned int *result, int gc_type, int type, char alloc_mode)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
@@ -287,10 +328,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_segno = NULL_SEGNO;
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
}
if (p.max_search == 0)
goto out;
last_victim = sbi->last_victim[p.gc_mode];
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
@@ -303,9 +352,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
if (segno >= last_segment) {
if (sbi->last_victim[p.gc_mode]) {
last_segment = sbi->last_victim[p.gc_mode];
sbi->last_victim[p.gc_mode] = 0;
if (sm->last_victim[p.gc_mode]) {
last_segment =
sm->last_victim[p.gc_mode];
sm->last_victim[p.gc_mode] = 0;
p.offset = 0;
continue;
}
@@ -322,13 +372,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
secno = GET_SECNO(sbi, segno);
secno = GET_SEC_FROM_SEG(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
if (gc_type == FG_GC && p.alloc_mode == LFS &&
no_fggc_candidate(sbi, secno))
goto next;
cost = get_gc_cost(sbi, segno, &p);
@@ -338,17 +390,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
next:
if (nsearched >= p.max_search) {
if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
sbi->last_victim[p.gc_mode] = last_victim + 1;
if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sbi->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
break;
}
}
if (p.min_segno != NULL_SEGNO) {
got_it:
if (p.alloc_mode == LFS) {
secno = GET_SECNO(sbi, p.min_segno);
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
sbi->cur_victim_sec = secno;
else
@@ -531,12 +584,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
get_node_info(sbi, nid, dni);
if (sum->version != dni->version) {
f2fs_put_page(node_page, 1);
return false;
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: valid data with mismatched node version.",
__func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
*nofs = ofs_of_node(node_page);
source_blkaddr = datablock_addr(node_page, ofs_in_node);
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr)
@@ -544,15 +599,21 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static void move_encrypted_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
*/
static void move_data_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
.temp = COLD,
.op = REQ_OP_READ,
.op_flags = REQ_SYNC,
.encrypted_page = NULL,
.in_list = false,
};
struct dnode_of_data dn;
struct f2fs_summary sum;
@@ -596,7 +657,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA);
&sum, CURSEG_COLD_DATA, NULL, false);
fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -634,7 +695,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
fio.op = REQ_OP_WRITE;
fio.op_flags = REQ_SYNC | REQ_NOIDLE;
fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
f2fs_submit_page_write(&fio);
f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
@@ -676,10 +739,14 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
.temp = COLD,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_NOIDLE,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
.need_lock = LOCK_REQ,
.io_type = FS_GC_DATA_IO,
};
bool is_dirty = PageDirty(page);
int err;
@@ -768,8 +835,7 @@ next_step:
continue;
/* if encrypted inode, let's go phase 3 */
if (f2fs_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
if (f2fs_encrypted_file(inode)) {
add_gc_inode(gc_list, inode);
continue;
}
@@ -803,14 +869,18 @@ next_step:
continue;
}
locked = true;
/* wait for all inflight aio data */
inode_dio_wait(inode);
}
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx, segno, off);
if (f2fs_encrypted_file(inode))
move_data_block(inode, start_bidx, segno, off);
else
move_data_page(inode, start_bidx, gc_type, segno, off);
move_data_page(inode, start_bidx, gc_type,
segno, off);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
@@ -847,7 +917,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
int sec_freed = 0;
int seg_freed = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
@@ -871,7 +941,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
if (get_valid_blocks(sbi, segno, 1) == 0 ||
if (get_valid_blocks(sbi, segno, false) == 0 ||
!PageUptodate(sum_page) ||
unlikely(f2fs_cp_error(sbi)))
goto next;
@@ -886,7 +956,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
* - mutex_lock(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
gc_node_segment(sbi, sum->entries, segno, gc_type);
else
@@ -894,90 +963,113 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
gc_type);
stat_inc_seg_count(sbi, type, gc_type);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
next:
f2fs_put_page(sum_page, 0);
}
if (gc_type == FG_GC)
f2fs_submit_merged_bio(sbi,
(type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
f2fs_submit_merged_write(sbi,
(type == SUM_TYPE_NODE) ? NODE : DATA);
blk_finish_plug(&plug);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0)
sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
return sec_freed;
return seg_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, unsigned int segno)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret = -EINVAL;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
free_sections(sbi),
free_segments(sbi),
reserved_segments(sbi),
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
gc_more:
segno = NULL_SEGNO;
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
ret = -EINVAL;
goto stop;
}
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
gc_type = FG_GC;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
/*
* If there is no victim and no prefree segment but still not
* enough free sections, we should flush dent/node blocks and do
* garbage collections.
* For example, if there are many prefree_segments below given
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
if (__get_victim(sbi, &segno, gc_type) ||
prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
segno = NULL_SEGNO;
} else if (has_not_enough_free_secs(sbi, 0, 0)) {
if (prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
} else if (gc_type == BG_GC && !background) {
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (has_not_enough_free_secs(sbi, 0, 0))
gc_type = FG_GC;
}
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background) {
ret = -EINVAL;
goto stop;
}
if (!__get_victim(sbi, &segno, gc_type)) {
ret = -ENODATA;
goto stop;
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
if (do_garbage_collect(sbi, segno, &gc_list, gc_type) &&
gc_type == FG_GC)
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
sec_freed++;
total_freed += seg_freed;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed, 0))
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
segno = NULL_SEGNO;
goto gc_more;
}
if (gc_type == FG_GC)
ret = write_checkpoint(sbi, &cpc);
}
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
free_sections(sbi),
free_segments(sbi),
reserved_segments(sbi),
prefree_segments(sbi));
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list);
@@ -989,5 +1081,20 @@ stop:
void build_gc_manager(struct f2fs_sb_info *sbi)
{
u64 main_count, resv_count, ovp_count;
DIRTY_I(sbi)->v_ops = &default_v_ops;
/* threshold of # of valid blocks in a section for victims of FG_GC */
main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
BLKS_PER_SEC(sbi), (main_count - resv_count));
/* give warm/cold data area from slower device */
if (sbi->s_ndevs && sbi->segs_per_sec == 1)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}

View File

@@ -13,6 +13,7 @@
* whether IO subsystem is idle
* or not
*/
#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
@@ -27,12 +28,15 @@ struct f2fs_gc_kthread {
wait_queue_head_t gc_wait_queue_head;
/* for gc sleep time */
unsigned int urgent_sleep_time;
unsigned int min_sleep_time;
unsigned int max_sleep_time;
unsigned int no_gc_sleep_time;
/* for changing gc mode */
unsigned int gc_idle;
unsigned int gc_urgent;
unsigned int gc_wake;
};
struct gc_inode_list {
@@ -65,25 +69,32 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
}
static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
unsigned int *wait)
{
unsigned int min_time = gc_th->min_sleep_time;
unsigned int max_time = gc_th->max_sleep_time;
if (*wait == gc_th->no_gc_sleep_time)
return;
*wait += gc_th->min_sleep_time;
if (*wait > gc_th->max_sleep_time)
*wait = gc_th->max_sleep_time;
if ((long long)*wait + (long long)min_time > (long long)max_time)
*wait = max_time;
else
*wait += min_time;
}
static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
unsigned int *wait)
{
unsigned int min_time = gc_th->min_sleep_time;
if (*wait == gc_th->no_gc_sleep_time)
*wait = gc_th->max_sleep_time;
*wait -= gc_th->min_sleep_time;
if (*wait <= gc_th->min_sleep_time)
*wait = gc_th->min_sleep_time;
if ((long long)*wait - (long long)min_time < (long long)min_time)
*wait = min_time;
else
*wait -= min_time;
}
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)

View File

@@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
struct fscrypt_name *fname)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
@@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
const unsigned char *name = name_info->name;
size_t len = name_info->len;
/* encrypted bigname case */
if (fname && !fname->disk_name.name)
return cpu_to_le32(fname->hash);
if (is_dot_dotdot(name_info))
return 0;

View File

@@ -23,10 +23,10 @@ bool f2fs_may_inline_data(struct inode *inode)
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
if (i_size_read(inode) > MAX_INLINE_DATA)
if (i_size_read(inode) > MAX_INLINE_DATA(inode))
return false;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
if (f2fs_encrypted_file(inode))
return false;
return true;
@@ -45,6 +45,7 @@ bool f2fs_may_inline_dentry(struct inode *inode)
void read_inline_data(struct page *page, struct page *ipage)
{
struct inode *inode = page->mapping->host;
void *src_addr, *dst_addr;
if (PageUptodate(page))
@@ -52,31 +53,33 @@ void read_inline_data(struct page *page, struct page *ipage)
f2fs_bug_on(F2FS_P_SB(page), page->index);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
src_addr = inline_data_addr(inode, ipage);
dst_addr = kmap_atomic(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
flush_dcache_page(page);
kunmap_atomic(dst_addr);
if (!PageUptodate(page))
SetPageUptodate(page);
}
bool truncate_inline_inode(struct page *ipage, u64 from)
void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
{
void *addr;
if (from >= MAX_INLINE_DATA)
return false;
if (from >= MAX_INLINE_DATA(inode))
return;
addr = inline_data_addr(ipage);
addr = inline_data_addr(inode, ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
memset(addr + from, 0, MAX_INLINE_DATA(inode) - from);
set_page_dirty(ipage);
return true;
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -132,6 +135,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO,
.page = page,
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
int dirty, err;
@@ -153,6 +157,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty) {
@@ -164,11 +169,11 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
f2fs_put_dnode(dn);
return 0;
}
@@ -215,6 +220,8 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
struct dnode_of_data dn;
struct address_space *mapping = page_mapping(page);
unsigned long flags;
int err;
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -231,11 +238,16 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
dst_addr = inline_data_addr(inode, dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page);
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
@@ -270,9 +282,9 @@ process_inline:
f2fs_wait_on_page_writeback(ipage, NODE, true);
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
src_addr = inline_data_addr(inode, npage);
dst_addr = inline_data_addr(inode, ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode));
set_inode_flag(inode, FI_INLINE_DATA);
set_inode_flag(inode, FI_DATA_EXIST);
@@ -285,9 +297,8 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
truncate_inline_inode(inode, ipage, 0);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (truncate_blocks(inode, 0, false))
@@ -301,11 +312,11 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_inline_dentry *inline_dentry;
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
void *inline_dentry;
f2fs_hash_t namehash;
ipage = get_node_page(sbi, dir->i_ino);
@@ -314,11 +325,11 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
return NULL;
}
namehash = f2fs_dentry_hash(&name);
namehash = f2fs_dentry_hash(&name, fname);
inline_dentry = inline_data_addr(ipage);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(dir, &d, inline_dentry);
de = find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
@@ -332,19 +343,19 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
int make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage)
{
struct f2fs_inline_dentry *dentry_blk;
struct f2fs_dentry_ptr d;
void *inline_dentry;
dentry_blk = inline_data_addr(ipage);
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
make_dentry_ptr_inline(inode, &d, inline_dentry);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
/* update i_size to MAX_INLINE_DATA */
if (i_size_read(inode) < MAX_INLINE_DATA)
f2fs_i_size_write(inode, MAX_INLINE_DATA);
if (i_size_read(inode) < MAX_INLINE_DATA(inode))
f2fs_i_size_write(inode, MAX_INLINE_DATA(inode));
return 0;
}
@@ -353,11 +364,12 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
* release ipage in this function.
*/
static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
void *inline_dentry)
{
struct page *page;
struct dnode_of_data dn;
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dentry_ptr src, dst;
int err;
page = f2fs_grab_cache_page(dir->i_mapping, 0, false);
@@ -372,25 +384,24 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
goto out;
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE);
dentry_blk = kmap_atomic(page);
make_dentry_ptr_inline(dir, &src, inline_dentry);
make_dentry_ptr_block(dir, &dst, dentry_blk);
/* copy data from inline dentry block to new dentry block */
memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
INLINE_DENTRY_BITMAP_SIZE);
memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0,
SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE);
memcpy(dst.bitmap, src.bitmap, src.nr_bitmap);
memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap);
/*
* we do not need to zero out remainder part of dentry and filename
* field, since we have used bitmap for marking the usage status of
* them, besides, we can also ignore copying/zeroing reserved space
* of dentry block, because them haven't been used so far.
*/
memcpy(dentry_blk->dentry, inline_dentry->dentry,
sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
memcpy(dentry_blk->filename, inline_dentry->filename,
NR_INLINE_DENTRY * F2FS_SLOT_LEN);
memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
kunmap_atomic(dentry_blk);
if (!PageUptodate(page))
@@ -398,7 +409,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_inode(ipage, 0);
truncate_inline_inode(dir, ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
@@ -411,14 +422,13 @@ out:
return err;
}
static int f2fs_add_inline_entries(struct inode *dir,
struct f2fs_inline_dentry *inline_dentry)
static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
{
struct f2fs_dentry_ptr d;
unsigned long bit_pos = 0;
int err = 0;
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(dir, &d, inline_dentry);
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
@@ -460,20 +470,20 @@ punch_dentry_pages:
}
static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
void *inline_dentry)
{
struct f2fs_inline_dentry *backup_dentry;
void *backup_dentry;
int err;
backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir),
sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO);
MAX_INLINE_DATA(dir), GFP_F2FS_ZERO);
if (!backup_dentry) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
truncate_inline_inode(ipage, 0);
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir));
truncate_inline_inode(dir, ipage, 0);
unlock_page(ipage);
@@ -489,9 +499,9 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
@@ -500,7 +510,7 @@ recover:
}
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
void *inline_dentry)
{
if (!F2FS_I(dir)->i_dir_level)
return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
@@ -516,7 +526,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
struct f2fs_inline_dentry *dentry_blk = NULL;
void *inline_dentry = NULL;
struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(new_name->len);
struct page *page = NULL;
@@ -526,11 +536,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (IS_ERR(ipage))
return PTR_ERR(ipage);
dentry_blk = inline_data_addr(ipage);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
slots, NR_INLINE_DENTRY);
if (bit_pos >= NR_INLINE_DENTRY) {
err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
bit_pos = room_for_filename(d.bitmap, slots, d.max);
if (bit_pos >= d.max) {
err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
if (err)
return err;
err = -EAGAIN;
@@ -545,14 +556,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(new_name);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
name_hash = f2fs_dentry_hash(new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -575,7 +583,8 @@ out:
void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *dir, struct inode *inode)
{
struct f2fs_inline_dentry *inline_dentry;
struct f2fs_dentry_ptr d;
void *inline_dentry;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
unsigned int bit_pos;
int i;
@@ -583,11 +592,12 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
lock_page(page);
f2fs_wait_on_page_writeback(page, NODE, true);
inline_dentry = inline_data_addr(page);
bit_pos = dentry - inline_dentry->dentry;
inline_dentry = inline_data_addr(dir, page);
make_dentry_ptr_inline(dir, &d, inline_dentry);
bit_pos = dentry - d.dentry;
for (i = 0; i < slots; i++)
__clear_bit_le(bit_pos + i,
&inline_dentry->dentry_bitmap);
__clear_bit_le(bit_pos + i, d.bitmap);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -604,20 +614,21 @@ bool f2fs_empty_inline_dir(struct inode *dir)
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos = 2;
struct f2fs_inline_dentry *dentry_blk;
void *inline_dentry;
struct f2fs_dentry_ptr d;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return false;
dentry_blk = inline_data_addr(ipage);
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_INLINE_DENTRY,
bit_pos);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos);
f2fs_put_page(ipage, 1);
if (bit_pos < NR_INLINE_DENTRY)
if (bit_pos < d.max)
return false;
return true;
@@ -627,25 +638,27 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct fscrypt_str *fstr)
{
struct inode *inode = file_inode(file);
struct f2fs_inline_dentry *inline_dentry = NULL;
struct page *ipage = NULL;
struct f2fs_dentry_ptr d;
void *inline_dentry = NULL;
int err;
if (ctx->pos == NR_INLINE_DENTRY)
make_dentry_ptr_inline(inode, &d, inline_dentry);
if (ctx->pos == d.max)
return 0;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
inline_dentry = inline_data_addr(ipage);
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(inode, &d, inline_dentry);
err = f2fs_fill_dentries(ctx, &d, 0, fstr);
if (!err)
ctx->pos = NR_INLINE_DENTRY;
ctx->pos = d.max;
f2fs_put_page(ipage, 1);
return err < 0 ? err : 0;
@@ -670,7 +683,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
goto out;
}
ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode));
ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode));
if (start >= ilen)
goto out;
if (start + len < ilen)
@@ -679,7 +692,8 @@ int f2fs_inline_data_fiemap(struct inode *inode,
get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage);
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
out:
f2fs_put_page(ipage, 1);

View File

@@ -16,6 +16,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include <trace/events/f2fs.h>
@@ -44,25 +45,26 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
f2fs_mark_inode_dirty_sync(inode, false);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
int extra_size = get_extra_isize(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
if (ri->i_addr[0])
inode->i_rdev =
old_decode_dev(le32_to_cpu(ri->i_addr[0]));
if (ri->i_addr[extra_size])
inode->i_rdev = old_decode_dev(
le32_to_cpu(ri->i_addr[extra_size]));
else
inode->i_rdev =
new_decode_dev(le32_to_cpu(ri->i_addr[1]));
inode->i_rdev = new_decode_dev(
le32_to_cpu(ri->i_addr[extra_size + 1]));
}
}
static bool __written_first_block(struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[0]);
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
if (addr != NEW_ADDR && addr != NULL_ADDR)
return true;
@@ -71,25 +73,27 @@ static bool __written_first_block(struct f2fs_inode *ri)
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
int extra_size = get_extra_isize(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
if (old_valid_dev(inode->i_rdev)) {
ri->i_addr[0] =
ri->i_addr[extra_size] =
cpu_to_le32(old_encode_dev(inode->i_rdev));
ri->i_addr[1] = 0;
ri->i_addr[extra_size + 1] = 0;
} else {
ri->i_addr[0] = 0;
ri->i_addr[1] =
ri->i_addr[extra_size] = 0;
ri->i_addr[extra_size + 1] =
cpu_to_le32(new_encode_dev(inode->i_rdev));
ri->i_addr[2] = 0;
ri->i_addr[extra_size + 2] = 0;
}
}
}
static void __recover_inline_status(struct inode *inode, struct page *ipage)
{
void *inline_data = inline_data_addr(ipage);
void *inline_data = inline_data_addr(inode, ipage);
__le32 *start = inline_data;
__le32 *end = start + MAX_INLINE_DATA / sizeof(__le32);
__le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32);
while (start < end) {
if (*start++) {
@@ -104,12 +108,84 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
return;
}
static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
int extra_isize = le32_to_cpu(ri->i_extra_isize);
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum))
return false;
return true;
}
static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_node *node = F2FS_NODE(page);
struct f2fs_inode *ri = &node->i;
__le32 ino = node->footer.ino;
__le32 gen = ri->i_generation;
__u32 chksum, chksum_seed;
__u32 dummy_cs = 0;
unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum);
unsigned int cs_size = sizeof(dummy_cs);
chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino,
sizeof(ino));
chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen));
chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset);
chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size);
offset += cs_size;
chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset,
F2FS_BLKSIZE - offset);
return chksum;
}
bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri;
__u32 provided, calculated;
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
return true;
ri = &F2FS_NODE(page)->i;
provided = le32_to_cpu(ri->i_inode_checksum);
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
f2fs_msg(sbi->sb, KERN_WARNING,
"checksum invalid, ino = %x, %x vs. %x",
ino_of_node(page), provided, calculated);
return provided == calculated;
}
void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
if (!f2fs_enable_inode_chksum(sbi, page))
return;
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@@ -130,7 +206,7 @@ static int do_read_inode(struct inode *inode)
i_gid_write(inode, le32_to_cpu(ri->i_gid));
set_nlink(inode, le32_to_cpu(ri->i_links));
inode->i_size = le64_to_cpu(ri->i_size);
inode->i_blocks = le64_to_cpu(ri->i_blocks);
inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
@@ -153,6 +229,9 @@ static int do_read_inode(struct inode *inode)
get_inline_info(inode, ri);
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -166,6 +245,16 @@ static int do_read_inode(struct inode *inode)
if (!need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
if (fi->i_flags & FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
@@ -226,6 +315,7 @@ make_now:
ret = -EIO;
goto bad_inode;
}
f2fs_set_inode_flags(inode);
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -267,7 +357,7 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
if (et) {
read_lock(&et->lock);
@@ -291,6 +381,20 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_generation = cpu_to_le32(inode->i_generation);
ri->i_dir_level = F2FS_I(inode)->i_dir_level;
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
i_projid = from_kprojid(&init_user_ns,
F2FS_I(inode)->i_projid);
ri->i_projid = cpu_to_le32(i_projid);
}
}
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
@@ -316,7 +420,6 @@ retry:
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi, false);
}
f2fs_inode_synced(inode);
return 0;
}
ret = update_inode(inode, node_page);
@@ -339,7 +442,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
if (update_inode_page(inode) && wbc && wbc->nr_to_write)
update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
@@ -372,10 +476,7 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE))
goto no_delete;
#endif
dquot_initialize(inode);
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -387,6 +488,12 @@ retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
#endif
if (!err) {
f2fs_lock_op(sbi);
err = remove_inode_page(inode);
@@ -403,13 +510,22 @@ retry:
if (err)
update_inode_page(inode);
dquot_free_inode(inode);
sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
stat_dec_inline_xattr(inode);
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (inode->i_nlink) {
@@ -421,9 +537,10 @@ no_delete:
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
} else {
f2fs_bug_on(sbi, err &&
!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
}
f2fs_bug_on(sbi, err &&
!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
out_clear:
fscrypt_put_encryption_info(inode, NULL);
clear_inode(inode);
@@ -446,6 +563,7 @@ void handle_failed_inode(struct inode *inode)
* in a panic when flushing dirty inodes in gdirty_list.
*/
update_inode_page(inode);
f2fs_inode_synced(inode);
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);

View File

@@ -15,6 +15,7 @@
#include <linux/ctype.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
#include "f2fs.h"
#include "node.h"
@@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
}
f2fs_unlock_op(sbi);
nid_free = true;
inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
@@ -52,16 +55,35 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
nid_free = true;
goto fail;
}
if (f2fs_sb_has_project_quota(sbi->sb) &&
(F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = dquot_initialize(inode);
if (err)
goto fail_drop;
err = dquot_alloc_inode(inode);
if (err)
goto fail_drop;
/* If the directory encrypted, then we should encrypt the inode. */
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
set_inode_flag(inode, FI_NEW_INODE);
if (f2fs_sb_has_extra_attr(sbi->sb)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
@@ -75,6 +97,15 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_flags |= FS_INDEX_FL;
if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
trace_f2fs_new_inode(inode, 0);
return inode;
@@ -85,6 +116,16 @@ fail:
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
fail_drop:
trace_f2fs_new_inode(inode, err);
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return ERR_PTR(err);
}
static int is_multimedia_file(const unsigned char *s, const char *sub)
@@ -136,6 +177,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -148,8 +193,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -163,6 +206,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
@@ -180,6 +225,15 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(dir);
if (err)
return err;
f2fs_balance_fs(sbi, true);
inode->i_ctime = current_time(inode);
@@ -233,6 +287,10 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
return 0;
}
err = dquot_initialize(dir);
if (err)
return err;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
@@ -324,9 +382,10 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
f2fs_msg(inode->i_sb, KERN_WARNING,
"Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
err = -EPERM;
goto err_out;
}
return d_splice_alias(inode, dentry);
@@ -346,6 +405,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
trace_f2fs_unlink_enter(dir, dentry);
err = dquot_initialize(dir);
if (err)
return err;
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
if (IS_ERR(page))
@@ -400,7 +463,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
return err;
if (!fscrypt_has_encryption_key(dir))
return -EPERM;
return -ENOKEY;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
@@ -409,6 +472,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (disk_link.len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -420,8 +487,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -444,7 +509,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
goto err_out;
if (!fscrypt_has_encryption_key(inode)) {
err = -EPERM;
err = -ENOKEY;
goto err_out;
}
@@ -484,6 +549,8 @@ err_out:
}
kfree(sd);
f2fs_balance_fs(sbi, true);
return err;
out:
handle_failed_inode(inode);
@@ -496,6 +563,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -505,8 +576,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
f2fs_balance_fs(sbi, true);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
@@ -521,6 +590,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out_fail:
@@ -544,6 +615,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -551,8 +626,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -566,6 +639,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
@@ -579,6 +654,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
err = dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -592,8 +671,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
@@ -619,6 +696,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
f2fs_balance_fs(sbi, true);
return 0;
release_out:
@@ -672,6 +751,19 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
if (err)
goto out;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -717,13 +809,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
err = update_dent_inode(old_inode, new_inode,
&new_dentry->d_name);
if (err) {
release_orphan_inode(sbi);
goto put_out_dir;
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = current_time(new_inode);
@@ -775,9 +860,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
down_write(&F2FS_I(old_inode)->i_sem);
file_lost_pino(old_inode);
if (new_inode && file_enc_name(new_inode))
file_set_enc_name(old_inode);
if (!old_dir_entry || whiteout)
file_lost_pino(old_inode);
else
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
@@ -858,6 +944,22 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
!fscrypt_has_permitted_context(old_dir, new_inode)))
return -EPERM;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)) ||
(is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(old_dir)->i_projid,
F2FS_I(new_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
if (err)
goto out;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -905,8 +1007,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old_nlink = old_dir_entry ? -1 : 1;
new_nlink = -old_nlink;
err = -EMLINK;
if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX))
goto out_new_dir;
}
@@ -914,18 +1016,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
if (err)
goto out_unlock;
if (file_enc_name(new_inode))
file_set_enc_name(old_inode);
err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name);
if (err)
goto out_undo;
if (file_enc_name(old_inode))
file_set_enc_name(new_inode);
/* update ".." directory entry info of old dentry */
if (old_dir_entry)
f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
@@ -941,7 +1031,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
file_lost_pino(old_inode);
up_write(&F2FS_I(old_inode)->i_sem);
old_dir->i_ctime = CURRENT_TIME;
old_dir->i_ctime = current_time(old_dir);
if (old_nlink) {
down_write(&F2FS_I(old_dir)->i_sem);
f2fs_i_links_write(old_dir, old_nlink > 0);
@@ -956,7 +1046,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
file_lost_pino(new_inode);
up_write(&F2FS_I(new_inode)->i_sem);
new_dir->i_ctime = CURRENT_TIME;
new_dir->i_ctime = current_time(new_dir);
if (new_nlink) {
down_write(&F2FS_I(new_dir)->i_sem);
f2fs_i_links_write(new_dir, new_nlink > 0);
@@ -969,14 +1059,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
/*
* Still we may fail to recover name info of f2fs_inode here
* Drop it, once its name is set as encrypted
*/
update_dent_inode(old_inode, old_inode, &old_dentry->d_name);
out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
if (new_dir_entry) {
f2fs_dentry_kunmap(new_inode, new_dir_page);

File diff suppressed because it is too large Load Diff

View File

@@ -9,10 +9,10 @@
* published by the Free Software Foundation.
*/
/* start node id of a node block dedicated to the given node id */
#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
/* node block offset on the NAT area dedicated to the given start node id */
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 8
@@ -62,16 +62,16 @@ struct nat_entry {
struct node_info ni; /* in-memory node information */
};
#define nat_get_nid(nat) (nat->ni.nid)
#define nat_set_nid(nat, n) (nat->ni.nid = n)
#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
#define nat_get_ino(nat) (nat->ni.ino)
#define nat_set_ino(nat, i) (nat->ni.ino = i)
#define nat_get_version(nat) (nat->ni.version)
#define nat_set_version(nat, v) (nat->ni.version = v)
#define nat_get_nid(nat) ((nat)->ni.nid)
#define nat_set_nid(nat, n) ((nat)->ni.nid = (n))
#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr)
#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b))
#define nat_get_ino(nat) ((nat)->ni.ino)
#define nat_set_ino(nat, i) ((nat)->ni.ino = (i))
#define nat_get_version(nat) ((nat)->ni.version)
#define nat_set_version(nat, v) ((nat)->ni.version = (v))
#define inc_node_version(version) (++version)
#define inc_node_version(version) (++(version))
static inline void copy_node_info(struct node_info *dst,
struct node_info *src)
@@ -200,21 +200,18 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
struct f2fs_nm_info *nm_i = NM_I(sbi);
pgoff_t block_off;
pgoff_t block_addr;
int seg_off;
/*
* block_off = segment_off * 512 + off_in_segment
* OLD = (segment_off * 512) * 2 + off_in_segment
* NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment
*/
block_off = NAT_BLOCK_OFFSET(start);
seg_off = block_off >> sbi->log_blocks_per_seg;
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off << 1) -
(block_off & (sbi->blocks_per_seg - 1)));
#ifdef CONFIG_F2FS_CHECK_FS
if (f2fs_test_bit(block_off, nm_i->nat_bitmap) !=
f2fs_test_bit(block_off, nm_i->nat_bitmap_mir))
f2fs_bug_on(sbi, 1);
#endif
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
@@ -227,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
struct f2fs_nm_info *nm_i = NM_I(sbi);
block_addr -= nm_i->nat_blkaddr;
if ((block_addr >> sbi->log_blocks_per_seg) % 2)
block_addr -= sbi->blocks_per_seg;
else
block_addr += sbi->blocks_per_seg;
block_addr ^= 1 << sbi->log_blocks_per_seg;
return block_addr + nm_i->nat_blkaddr;
}
@@ -306,14 +299,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
@@ -321,14 +311,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
return cp_ver == cpver_of_node(page);
}
@@ -358,7 +345,7 @@ static inline bool IS_DNODE(struct page *node_page)
unsigned int ofs = ofs_of_node(node_page);
if (f2fs_has_xattr_block(ofs))
return false;
return true;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)

View File

@@ -69,20 +69,34 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
}
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
struct list_head *head, nid_t ino)
struct list_head *head, nid_t ino, bool quota_inode)
{
struct inode *inode;
struct fsync_inode_entry *entry;
int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
err = dquot_initialize(inode);
if (err)
goto err_out;
if (quota_inode) {
err = dquot_alloc_inode(inode);
if (err)
goto err_out;
}
entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
entry->inode = inode;
list_add_tail(&entry->list, head);
return entry;
err_out:
iput(inode);
return ERR_PTR(err);
}
static void del_fsync_inode(struct fsync_inode_entry *entry)
@@ -107,7 +121,8 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
entry = get_fsync_inode(dir_list, pino);
if (!entry) {
entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
pino, false);
if (IS_ERR(entry)) {
dir = ERR_CAST(entry);
err = PTR_ERR(entry);
@@ -140,6 +155,13 @@ retry:
err = -EEXIST;
goto out_unmap_put;
}
err = dquot_initialize(einode);
if (err) {
iput(einode);
goto out_unmap_put;
}
err = acquire_orphan_inode(F2FS_I_SB(inode));
if (err) {
iput(einode);
@@ -198,7 +220,8 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
struct curseg_info *curseg;
struct page *page = NULL;
@@ -225,17 +248,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry) {
if (IS_INODE(page) && is_dent_dnode(page)) {
bool quota_inode = false;
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
break;
quota_inode = true;
}
/*
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
entry = add_fsync_inode(sbi, head, ino_of_node(page));
entry = add_fsync_inode(sbi, head, ino_of_node(page),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
if (err == -ENOENT) {
@@ -326,10 +354,18 @@ got_it:
f2fs_put_page(node_page, 1);
if (ino != dn->inode->i_ino) {
int ret;
/* Deallocate previous index in the node page */
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return PTR_ERR(inode);
ret = dquot_initialize(inode);
if (ret) {
iput(inode);
return ret;
}
} else {
inode = dn->inode;
}
@@ -359,7 +395,8 @@ out:
return 0;
truncate_out:
if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
if (datablock_addr(tdn.inode, tdn.node_page,
tdn.ofs_in_node) == blkaddr)
truncate_data_blocks_range(&tdn, 1);
if (dn->inode->i_ino == nid && !dn->inode_page_locked)
unlock_page(dn->inode_page);
@@ -378,11 +415,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (IS_INODE(page)) {
recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
/*
* Deprecated; xattr blocks should be found from cold log.
* But, we should remain this for backward compatibility.
*/
recover_xattr_data(inode, page, blkaddr);
err = recover_xattr_data(inode, page, blkaddr);
if (!err)
recovered++;
goto out;
}
@@ -414,8 +449,8 @@ retry_dn:
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
src = datablock_addr(dn.node_page, dn.ofs_in_node);
dest = datablock_addr(page, dn.ofs_in_node);
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
/* skip recovering if dest is the same as src */
if (src == dest)
@@ -428,8 +463,9 @@ retry_dn:
}
if (!file_keep_isize(inode) &&
(i_size_read(inode) <= (start << PAGE_SHIFT)))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
f2fs_i_size_write(inode,
(loff_t)(start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -556,12 +592,27 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
struct list_head dir_list;
int err;
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
f2fs_enable_quota_files(sbi);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry));
if (!fsync_entry_slab)
return -ENOMEM;
if (!fsync_entry_slab) {
err = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&dir_list);
@@ -570,13 +621,13 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
mutex_lock(&sbi->cp_mutex);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
err = find_fsync_dnodes(sbi, &inode_list, check_only);
if (err || list_empty(&inode_list))
goto out;
goto skip;
if (check_only) {
ret = 1;
goto out;
goto skip;
}
need_writecp = true;
@@ -585,7 +636,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
err = recover_data(sbi, &inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
skip:
destroy_fsync_dnodes(&inode_list);
/* truncate meta pages to be used by the recovery */
@@ -598,8 +649,6 @@ out:
}
clear_sbi_flag(sbi, SBI_POR_DOING);
if (err)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */
@@ -613,5 +662,12 @@ out:
}
kmem_cache_destroy(fsync_entry_slab);
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
return ret ? ret: err;
}

File diff suppressed because it is too large Load Diff

View File

@@ -21,78 +21,88 @@
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
#define IS_CURSEG(sbi, seg) \
((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
#define IS_CURSEC(sbi, secno) \
((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
sbi->segs_per_sec)) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
(sbi)->segs_per_sec)) \
#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) (sbi->total_sections)
#define MAIN_SECS(sbi) ((sbi)->total_sections)
#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
sbi->log_blocks_per_seg))
#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \
(sbi)->log_blocks_per_seg))
#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
(GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
(GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
#define NEXT_FREE_BLKADDR(sbi, curseg) \
(START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
(START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define GET_SECNO(sbi, segno) \
((segno) / sbi->segs_per_sec)
#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define GET_SEC_FROM_SEG(sbi, segno) \
((segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define GET_ZONE_FROM_SEC(sbi, secno) \
((secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno))
#define GET_SUM_BLOCK(sbi, segno) \
((sbi->sm_info->ssa_blkaddr) + segno)
((sbi)->sm_info->ssa_blkaddr + (segno))
#define GET_SUM_TYPE(footer) ((footer)->entry_type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type))
#define SIT_ENTRY_OFFSET(sit_i, segno) \
(segno % sit_i->sents_per_block)
((segno) % (sit_i)->sents_per_block)
#define SIT_BLOCK_OFFSET(segno) \
(segno / SIT_ENTRY_PER_BLOCK)
((segno) / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
@@ -103,7 +113,7 @@
#define SECTOR_FROM_BLOCK(blk_addr) \
(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
#define SECTOR_TO_BLOCK(sectors) \
(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK)
/*
* indicate a block allocation direction: RIGHT and LEFT.
@@ -132,7 +142,10 @@ enum {
*/
enum {
GC_CB = 0,
GC_GREEDY
GC_GREEDY,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
};
/*
@@ -227,6 +240,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
struct free_segmap_info {
@@ -303,17 +318,17 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)];
}
static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int segno, int section)
unsigned int segno, bool use_section)
{
/*
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
if (section > 1)
if (use_section && sbi->segs_per_sec > 1)
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
@@ -358,8 +373,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
@@ -379,7 +394,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
set_bit(segno, free_i->free_segmap);
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
@@ -390,8 +406,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
@@ -412,7 +428,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
@@ -475,27 +492,9 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
return SM_I(sbi)->ovp_segments;
}
static inline int overprovision_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
}
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
}
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (test_opt(sbi, LFS))
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + 1);
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
@@ -540,6 +539,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
#define DEF_MIN_HOT_BLOCKS 16
enum {
F2FS_IPU_FORCE,
@@ -547,20 +547,22 @@ enum {
F2FS_IPU_UTIL,
F2FS_IPU_SSR_UTIL,
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
};
static inline bool need_inplace_update(struct inode *inode)
static inline bool need_inplace_update_policy(struct inode *inode,
struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int policy = SM_I(sbi)->ipu_policy;
/* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
return false;
if (test_opt(sbi, LFS))
return false;
/* if this is cold file, we should overwrite to avoid fragmentation */
if (file_is_cold(inode))
return true;
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
@@ -572,6 +574,15 @@ static inline bool need_inplace_update(struct inode *inode)
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
/*
* IPU for rewrite async pages
*/
if (policy & (0x1 << F2FS_IPU_ASYNC) &&
fio && fio->op == REQ_OP_WRITE &&
!(fio->op_flags & REQ_SYNC) &&
!f2fs_encrypted_inode(inode))
return true;
/* this is only set during fdatasync */
if (policy & (0x1 << F2FS_IPU_FSYNC) &&
is_inode_flag_set(inode, FI_NEED_IPU))
@@ -716,6 +727,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >=
sbi->fggc_threshold)
return true;
return false;
}
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -727,8 +747,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* By default, 512 pages for directory data,
* 512 pages (2MB) * 3 for three types of nodes, and
* max_bio_blocks for meta are set.
* 512 pages (2MB) * 8 for nodes, and
* 256 pages * 8 for meta are set.
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
@@ -764,3 +784,28 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
wbc->nr_to_write = desired;
return desired - nr_to_write;
}
static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
bool wakeup = false;
int i;
if (force)
goto wake_up;
mutex_lock(&dcc->cmd_lock);
for (i = MAX_PLIST_NUM - 1;
i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
if (!list_empty(&dcc->pend_list[i])) {
wakeup = true;
break;
}
}
mutex_unlock(&dcc->cmd_lock);
if (!wakeup)
return;
wake_up:
dcc->discard_wake = 1;
wake_up_interruptible_all(&dcc->discard_wait_queue);
}

File diff suppressed because it is too large Load Diff

556
fs/f2fs/sysfs.c Normal file
View File

@@ -0,0 +1,556 @@
/*
* f2fs sysfs interface
*
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
* Copyright (c) 2017 Chao Yu <chao@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
static struct proc_dir_entry *f2fs_proc_root;
/* Sysfs support for f2fs */
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
DCC_INFO, /* struct discard_cmd_control */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
#ifdef CONFIG_F2FS_FAULT_INJECTION
FAULT_INFO_RATE, /* struct f2fs_fault_info */
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
#endif
RESERVED_BLOCKS,
};
struct f2fs_attr {
struct attribute attr;
ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
const char *, size_t);
int struct_type;
int offset;
int id;
};
static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
{
if (struct_type == GC_THREAD)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
else if (struct_type == DCC_INFO)
return (unsigned char *)SM_I(sbi)->dcc_info;
else if (struct_type == NM_INFO)
return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS)
return (unsigned char *)sbi;
#ifdef CONFIG_F2FS_FAULT_INJECTION
else if (struct_type == FAULT_INFO_RATE ||
struct_type == FAULT_INFO_TYPE)
return (unsigned char *)&sbi->fault_info;
#endif
return NULL;
}
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->sb;
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->kbytes_written +
BD_PART_WRITTEN(sbi)));
}
static ssize_t features_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
struct super_block *sb = sbi->sb;
int len = 0;
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
if (f2fs_sb_has_crypto(sb))
len += snprintf(buf, PAGE_SIZE - len, "%s",
"encryption");
if (f2fs_sb_mounted_blkzoned(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "blkzoned");
if (f2fs_sb_has_extra_attr(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "extra_attr");
if (f2fs_sb_has_project_quota(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "projquota");
if (f2fs_sb_has_inode_chksum(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
unsigned char *ptr = NULL;
unsigned int *ui;
ptr = __struct_ptr(sbi, a->struct_type);
if (!ptr)
return -EINVAL;
ui = (unsigned int *)(ptr + a->offset);
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
}
static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
struct f2fs_sb_info *sbi,
const char *buf, size_t count)
{
unsigned char *ptr;
unsigned long t;
unsigned int *ui;
ssize_t ret;
ptr = __struct_ptr(sbi, a->struct_type);
if (!ptr)
return -EINVAL;
ui = (unsigned int *)(ptr + a->offset);
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret < 0)
return ret;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
return -EINVAL;
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
if ((unsigned long)sbi->total_valid_block_count + t >
(unsigned long)sbi->user_block_count) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
*ui = t;
spin_unlock(&sbi->stat_lock);
return count;
}
if (!strcmp(a->attr.name, "discard_granularity")) {
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
int i;
if (t == 0 || t > MAX_PLIST_NUM)
return -EINVAL;
if (t == *ui)
return count;
mutex_lock(&dcc->cmd_lock);
for (i = 0; i < MAX_PLIST_NUM; i++) {
if (i >= t - 1)
dcc->pend_list_tag[i] |= P_ACTIVE;
else
dcc->pend_list_tag[i] &= (~P_ACTIVE);
}
mutex_unlock(&dcc->cmd_lock);
*ui = t;
return count;
}
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
f2fs_reset_iostat(sbi);
if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) {
sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
}
return count;
}
static ssize_t f2fs_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
s_kobj);
struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
return a->show ? a->show(a, sbi, buf) : 0;
}
static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
s_kobj);
struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
return a->store ? a->store(a, sbi, buf, len) : 0;
}
static void f2fs_sb_release(struct kobject *kobj)
{
struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
s_kobj);
complete(&sbi->s_kobj_unregister);
}
enum feat_id {
FEAT_CRYPTO = 0,
FEAT_BLKZONED,
FEAT_ATOMIC_WRITE,
FEAT_EXTRA_ATTR,
FEAT_PROJECT_QUOTA,
FEAT_INODE_CHECKSUM,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
switch (a->id) {
case FEAT_CRYPTO:
case FEAT_BLKZONED:
case FEAT_ATOMIC_WRITE:
case FEAT_EXTRA_ATTR:
case FEAT_PROJECT_QUOTA:
case FEAT_INODE_CHECKSUM:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
}
#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
static struct f2fs_attr f2fs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
.show = _show, \
.store = _store, \
.struct_type = _struct_type, \
.offset = _offset \
}
#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
F2FS_ATTR_OFFSET(struct_type, name, 0644, \
f2fs_sbi_show, f2fs_sbi_store, \
offsetof(struct struct_name, elname))
#define F2FS_GENERAL_RO_ATTR(name) \
static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
#define F2FS_FEATURE_RO_ATTR(_name, _id) \
static struct f2fs_attr f2fs_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = 0444 }, \
.show = f2fs_feature_show, \
.id = _id, \
}
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time,
urgent_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
#ifdef CONFIG_F2FS_FAULT_INJECTION
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
#endif
#ifdef CONFIG_BLK_DEV_ZONED
F2FS_FEATURE_RO_ATTR(block_zoned, FEAT_BLKZONED);
#endif
F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE);
F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR);
F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_urgent_sleep_time),
ATTR_LIST(gc_min_sleep_time),
ATTR_LIST(gc_max_sleep_time),
ATTR_LIST(gc_no_gc_sleep_time),
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(dirty_nats_ratio),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
ATTR_LIST(iostat_enable),
#ifdef CONFIG_F2FS_FAULT_INJECTION
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
NULL,
};
static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_F2FS_FS_ENCRYPTION
ATTR_LIST(encryption),
#endif
#ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST(block_zoned),
#endif
ATTR_LIST(atomic_write),
ATTR_LIST(extra_attr),
ATTR_LIST(project_quota),
ATTR_LIST(inode_checksum),
NULL,
};
static const struct sysfs_ops f2fs_attr_ops = {
.show = f2fs_attr_show,
.store = f2fs_attr_store,
};
static struct kobj_type f2fs_sb_ktype = {
.default_attrs = f2fs_attrs,
.sysfs_ops = &f2fs_attr_ops,
.release = f2fs_sb_release,
};
static struct kobj_type f2fs_ktype = {
.sysfs_ops = &f2fs_attr_ops,
};
static struct kset f2fs_kset = {
.kobj = {.ktype = &f2fs_ktype},
};
static struct kobj_type f2fs_feat_ktype = {
.default_attrs = f2fs_feat_attrs,
.sysfs_ops = &f2fs_attr_ops,
};
static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
static int segment_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
unsigned int total_segs =
le32_to_cpu(sbi->raw_super->segment_count_main);
int i;
seq_puts(seq, "format: segment_type|valid_blocks\n"
"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
for (i = 0; i < total_segs; i++) {
struct seg_entry *se = get_seg_entry(sbi, i);
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, false));
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
seq_putc(seq, ' ');
}
return 0;
}
static int segment_bits_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
unsigned int total_segs =
le32_to_cpu(sbi->raw_super->segment_count_main);
int i, j;
seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
for (i = 0; i < total_segs; i++) {
struct seg_entry *se = get_seg_entry(sbi, i);
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u|", se->type,
get_valid_blocks(sbi, i, false));
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
}
return 0;
}
static int iostat_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
time64_t now = ktime_get_real_seconds();
if (!sbi->iostat_enable)
return 0;
seq_printf(seq, "time: %-16llu\n", now);
/* print app IOs */
seq_printf(seq, "app buffered: %-16llu\n",
sbi->write_iostat[APP_BUFFERED_IO]);
seq_printf(seq, "app direct: %-16llu\n",
sbi->write_iostat[APP_DIRECT_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
sbi->write_iostat[APP_MAPPED_IO]);
/* print fs IOs */
seq_printf(seq, "fs data: %-16llu\n",
sbi->write_iostat[FS_DATA_IO]);
seq_printf(seq, "fs node: %-16llu\n",
sbi->write_iostat[FS_NODE_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
sbi->write_iostat[FS_META_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
sbi->write_iostat[FS_GC_DATA_IO]);
seq_printf(seq, "fs gc node: %-16llu\n",
sbi->write_iostat[FS_GC_NODE_IO]);
seq_printf(seq, "fs cp data: %-16llu\n",
sbi->write_iostat[FS_CP_DATA_IO]);
seq_printf(seq, "fs cp node: %-16llu\n",
sbi->write_iostat[FS_CP_NODE_IO]);
seq_printf(seq, "fs cp meta: %-16llu\n",
sbi->write_iostat[FS_CP_META_IO]);
seq_printf(seq, "fs discard: %-16llu\n",
sbi->write_iostat[FS_DISCARD]);
return 0;
}
#define F2FS_PROC_FILE_DEF(_name) \
static int _name##_open_fs(struct inode *inode, struct file *file) \
{ \
return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
} \
\
static const struct file_operations f2fs_seq_##_name##_fops = { \
.open = _name##_open_fs, \
.read = seq_read, \
.llseek = seq_lseek, \
.release = single_release, \
};
F2FS_PROC_FILE_DEF(segment_info);
F2FS_PROC_FILE_DEF(segment_bits);
F2FS_PROC_FILE_DEF(iostat_info);
int __init f2fs_init_sysfs(void)
{
int ret;
kobject_set_name(&f2fs_kset.kobj, "f2fs");
f2fs_kset.kobj.parent = fs_kobj;
ret = kset_register(&f2fs_kset);
if (ret)
return ret;
ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype,
NULL, "features");
if (ret)
kset_unregister(&f2fs_kset);
else
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return ret;
}
void f2fs_exit_sysfs(void)
{
kobject_put(&f2fs_feat);
kset_unregister(&f2fs_kset);
remove_proc_entry("fs/f2fs", NULL);
f2fs_proc_root = NULL;
}
int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
{
struct super_block *sb = sbi->sb;
int err;
sbi->s_kobj.kset = &f2fs_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL,
"%s", sb->s_id);
if (err)
return err;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
if (sbi->s_proc) {
proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_segment_info_fops, sb);
proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
&f2fs_seq_segment_bits_fops, sb);
proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_iostat_info_fops, sb);
}
return 0;
}
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
{
if (sbi->s_proc) {
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
}

View File

@@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page)
pid_t pid = task_pid_nr(current);
void *p;
page->private = pid;
set_page_private(page, (unsigned long)pid);
if (radix_tree_preload(GFP_NOFS))
return;
@@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
results[ret] = iter.index;
if (++ret == PIDVEC_SIZE)
if (++ret == max_items)
break;
}
return ret;

View File

@@ -264,18 +264,123 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
void **last_addr, int index,
size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
base_addr + inline_size) {
*last_addr = entry;
return NULL;
}
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
continue;
if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
}
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
if (!size && !inline_size)
return -ENODATA;
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
f2fs_put_page(page, 1);
*xe = __find_inline_xattr(txattr_addr, &last_addr,
index, len, name);
if (*xe)
goto check;
}
/* read from xattr node block */
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto out;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
if (last_addr)
cur_addr = XATTR_HDR(last_addr) - 1;
else
cur_addr = txattr_addr;
*xe = __find_xattr(cur_addr, index, len, name);
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
goto out;
}
*base_addr = txattr_addr;
return 0;
out:
kzfree(txattr_addr);
return err;
}
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = VALID_XATTR_BLOCK_SIZE;
unsigned int inline_size = inline_xattr_size(inode);
void *txattr_addr;
int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
@@ -299,19 +404,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
}
/* read from xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto fail;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
@@ -333,14 +438,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
void *txattr_addr, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = 0;
size_t inline_size = inline_xattr_size(inode);
void *xattr_addr;
struct page *xpage;
nid_t new_nid = 0;
int err;
inline_size = inline_xattr_size(inode);
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
return -ENOSPC;
@@ -386,7 +489,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
@@ -395,23 +498,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
sizeof(struct node_footer));
memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
/* need to checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
struct f2fs_xattr_entry *entry = NULL;
int error = 0;
size_t size, len;
unsigned int size, len;
void *base_addr = NULL;
if (name == NULL)
return -EINVAL;
@@ -420,21 +520,18 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
error = read_all_xattrs(inode, ipage, &base_addr);
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
goto out;
}
if (buffer) {
@@ -442,8 +539,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
memcpy(buffer, pval, size);
}
error = size;
cleanup:
out:
kzfree(base_addr);
return error;
}
@@ -456,7 +552,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
int error = 0;
size_t rest = buffer_size;
down_read(&F2FS_I(inode)->i_xattr_sem);
error = read_all_xattrs(inode, NULL, &base_addr);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -485,6 +583,15 @@ cleanup:
return error;
}
static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
const void *value, size_t size)
{
void *pval = entry->e_name + entry->e_name_len;
return (le16_to_cpu(entry->e_value_size) == size) &&
!memcmp(pval, value, size);
}
static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
@@ -519,12 +626,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
if ((flags & XATTR_REPLACE) && !found) {
if (found) {
if ((flags & XATTR_CREATE)) {
error = -EEXIST;
goto exit;
}
if (f2fs_xattr_value_same(here, value, size))
goto exit;
} else if ((flags & XATTR_REPLACE)) {
error = -ENODATA;
goto exit;
} else if ((flags & XATTR_CREATE) && found) {
error = -EEXIST;
goto exit;
}
last = here;
@@ -618,7 +730,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
f2fs_lock_op(sbi);
/* protect xattr_ver */
down_write(&F2FS_I(inode)->i_sem);
down_write(&F2FS_I(inode)->i_xattr_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_xattr_sem);
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);

View File

@@ -58,10 +58,10 @@ struct f2fs_xattr_entry {
#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
#define XATTR_ROUND (3)
#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND)
#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
entry->e_name_len + le16_to_cpu(entry->e_value_size)))
(entry)->e_name_len + le16_to_cpu((entry)->e_value_size)))
#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
ENTRY_SIZE(entry)))
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
sizeof(struct node_footer) - sizeof(__u32))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
sizeof(struct f2fs_xattr_header) - \

View File

@@ -114,6 +114,8 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_TRIMMED_FLAG 0x00000100
#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
@@ -184,6 +186,8 @@ struct f2fs_extent {
#define F2FS_NAME_LEN 255
#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
@@ -203,9 +207,7 @@ struct f2fs_extent {
#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */
#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */
#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */
#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
F2FS_INLINE_XATTR_ADDRS - 1))
#define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */
struct f2fs_inode {
__le16 i_mode; /* file mode */
@@ -233,8 +235,16 @@ struct f2fs_inode {
struct f2fs_extent i_ext; /* caching a largest extent */
__le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
union {
struct {
__le16 i_extra_isize; /* extra inode attribute size */
__le16 i_padding; /* padding */
__le32 i_projid; /* project id */
__le32 i_inode_checksum;/* inode meta checksum */
__le32 i_extra_end[0]; /* for attribute size calculation */
};
__le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
};
__le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2),
double_indirect(1) node id */
} __packed;
@@ -278,6 +288,7 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
@@ -462,7 +473,7 @@ typedef __le32 f2fs_hash_t;
#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
/*
* space utilization of regular dentry and inline dentry
* space utilization of regular dentry and inline dentry (w/o extra reservation)
* regular dentry inline dentry
* bitmap 1 * 27 = 27 1 * 23 = 23
* reserved 1 * 3 = 3 1 * 7 = 7
@@ -498,24 +509,6 @@ struct f2fs_dentry_block {
__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN];
} __packed;
/* for inline dir */
#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \
BITS_PER_BYTE - 1) / BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE))
/* inline directory entry structure */
struct f2fs_inline_dentry {
__u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE];
__u8 reserved[INLINE_RESERVED_SIZE];
struct f2fs_dir_entry dentry[NR_INLINE_DENTRY];
__u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN];
} __packed;
/* file types used in inode_info->flags */
enum {
F2FS_FT_UNKNOWN,
@@ -531,4 +524,6 @@ enum {
#define S_SHIFT 12
#define F2FS_DEF_PROJID 0 /* default project ID */
#endif /* _LINUX_F2FS_FS_H */

View File

@@ -0,0 +1,138 @@
/*
* fscrypt_common.h: common declarations for per-file encryption
*
* Copyright (C) 2015, Google, Inc.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#ifndef _LINUX_FSCRYPT_COMMON_H
#define _LINUX_FSCRYPT_COMMON_H
#include <linux/key.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <crypto/skcipher.h>
#include <uapi/linux/fs.h>
#define FS_CRYPTO_BLOCK_SIZE 16
struct fscrypt_info;
struct fscrypt_ctx {
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
u8 flags; /* Flags */
};
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
*/
struct fscrypt_symlink_data {
__le16 len;
char encrypted_path[1];
} __packed;
struct fscrypt_str {
unsigned char *name;
u32 len;
};
struct fscrypt_name {
const struct qstr *usr_fname;
struct fscrypt_str disk_name;
u32 hash;
u32 minor_hash;
struct fscrypt_str crypto_buf;
};
#define FSTR_INIT(n, l) { .name = n, .len = l }
#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
#define fname_name(p) ((p)->disk_name.name)
#define fname_len(p) ((p)->disk_name.len)
/*
* fscrypt superblock flags
*/
#define FS_CFLG_OWN_PAGES (1U << 1)
/*
* crypto opertions for filesystems
*/
struct fscrypt_operations {
unsigned int flags;
const char *key_prefix;
int (*get_context)(struct inode *, void *, size_t);
int (*set_context)(struct inode *, const void *, size_t, void *);
int (*dummy_context)(struct inode *);
bool (*is_encrypted)(struct inode *);
bool (*empty_dir)(struct inode *);
unsigned (*max_namelen)(struct inode *);
};
static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
{
if (inode->i_sb->s_cop->dummy_context &&
inode->i_sb->s_cop->dummy_context(inode))
return true;
return false;
}
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
{
if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
return true;
if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
return true;
return false;
}
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
return true;
if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
return true;
return false;
}
static inline struct page *fscrypt_control_page(struct page *page)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
#else
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
#endif
}
static inline int fscrypt_has_encryption_key(const struct inode *inode)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return (inode->i_crypt_info != NULL);
#else
return 0;
#endif
}
#endif /* _LINUX_FSCRYPT_COMMON_H */

View File

@@ -0,0 +1,177 @@
/*
* fscrypt_notsupp.h
*
* This stubs out the fscrypt functions for filesystems configured without
* encryption support.
*/
#ifndef _LINUX_FSCRYPT_NOTSUPP_H
#define _LINUX_FSCRYPT_NOTSUPP_H
#include <linux/fscrypt_common.h>
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
gfp_t gfp_flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
return;
}
static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
struct page *page,
unsigned int len,
unsigned int offs,
u64 lblk_num, gfp_t gfp_flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline int fscrypt_decrypt_page(const struct inode *inode,
struct page *page,
unsigned int len, unsigned int offs,
u64 lblk_num)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_restore_control_page(struct page *page)
{
return;
}
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
return;
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
return;
}
/* policy.c */
static inline int fscrypt_ioctl_set_policy(struct file *filp,
const void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_has_permitted_context(struct inode *parent,
struct inode *child)
{
return 0;
}
static inline int fscrypt_inherit_context(struct inode *parent,
struct inode *child,
void *fs_data, bool preload)
{
return -EOPNOTSUPP;
}
/* keyinfo.c */
static inline int fscrypt_get_encryption_info(struct inode *inode)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_put_encryption_info(struct inode *inode,
struct fscrypt_info *ci)
{
return;
}
/* fname.c */
static inline int fscrypt_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
if (dir->i_sb->s_cop->is_encrypted(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
static inline void fscrypt_free_filename(struct fscrypt_name *fname)
{
return;
}
static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode,
u32 ilen)
{
/* never happens */
WARN_ON(1);
return 0;
}
static inline int fscrypt_fname_alloc_buffer(const struct inode *inode,
u32 ilen,
struct fscrypt_str *crypto_str)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
{
return;
}
static inline int fscrypt_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len)
{
/* Encryption support disabled; use standard comparison */
if (de_name_len != fname->disk_name.len)
return false;
return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
}
/* bio.c */
static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx,
struct bio *bio)
{
return;
}
static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
return;
}
static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
return -EOPNOTSUPP;
}
#endif /* _LINUX_FSCRYPT_NOTSUPP_H */

View File

@@ -0,0 +1,145 @@
/*
* fscrypt_supp.h
*
* This is included by filesystems configured with encryption support.
*/
#ifndef _LINUX_FSCRYPT_SUPP_H
#define _LINUX_FSCRYPT_SUPP_H
#include <linux/fscrypt_common.h>
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
unsigned int, unsigned int,
u64, gfp_t);
extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
unsigned int, u64);
extern void fscrypt_restore_control_page(struct page *);
extern const struct dentry_operations fscrypt_d_ops;
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
d_set_d_op(dentry, &fscrypt_d_ops);
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
}
/* policy.c */
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
/* keyinfo.c */
extern int fscrypt_get_encryption_info(struct inode *);
extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
/* fname.c */
extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
int lookup, struct fscrypt_name *);
static inline void fscrypt_free_filename(struct fscrypt_name *fname)
{
kfree(fname->crypto_buf.name);
}
extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
struct fscrypt_str *);
extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
const struct fscrypt_str *, struct fscrypt_str *);
extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
struct fscrypt_str *);
#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32
/* Extracts the second-to-last ciphertext block; see explanation below */
#define FSCRYPT_FNAME_DIGEST(name, len) \
((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \
FS_CRYPTO_BLOCK_SIZE))
#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE
/**
* fscrypt_digested_name - alternate identifier for an on-disk filename
*
* When userspace lists an encrypted directory without access to the key,
* filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE
* bytes are shown in this abbreviated form (base64-encoded) rather than as the
* full ciphertext (base64-encoded). This is necessary to allow supporting
* filenames up to NAME_MAX bytes, since base64 encoding expands the length.
*
* To make it possible for filesystems to still find the correct directory entry
* despite not knowing the full on-disk name, we encode any filesystem-specific
* 'hash' and/or 'minor_hash' which the filesystem may need for its lookups,
* followed by the second-to-last ciphertext block of the filename. Due to the
* use of the CBC-CTS encryption mode, the second-to-last ciphertext block
* depends on the full plaintext. (Note that ciphertext stealing causes the
* last two blocks to appear "flipped".) This makes accidental collisions very
* unlikely: just a 1 in 2^128 chance for two filenames to collide even if they
* share the same filesystem-specific hashes.
*
* However, this scheme isn't immune to intentional collisions, which can be
* created by anyone able to create arbitrary plaintext filenames and view them
* without the key. Making the "digest" be a real cryptographic hash like
* SHA-256 over the full ciphertext would prevent this, although it would be
* less efficient and harder to implement, especially since the filesystem would
* need to calculate it for each directory entry examined during a search.
*/
struct fscrypt_digested_name {
u32 hash;
u32 minor_hash;
u8 digest[FSCRYPT_FNAME_DIGEST_SIZE];
};
/**
* fscrypt_match_name() - test whether the given name matches a directory entry
* @fname: the name being searched for
* @de_name: the name from the directory entry
* @de_name_len: the length of @de_name in bytes
*
* Normally @fname->disk_name will be set, and in that case we simply compare
* that to the name stored in the directory entry. The only exception is that
* if we don't have the key for an encrypted directory and a filename in it is
* very long, then we won't have the full disk_name and we'll instead need to
* match against the fscrypt_digested_name.
*
* Return: %true if the name matches, otherwise %false.
*/
static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len)
{
if (unlikely(!fname->disk_name.name)) {
const struct fscrypt_digested_name *n =
(const void *)fname->crypto_buf.name;
if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_'))
return false;
if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)
return false;
return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len),
n->digest, FSCRYPT_FNAME_DIGEST_SIZE);
}
if (de_name_len != fname->disk_name.len)
return false;
return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
}
/* bio.c */
extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
extern void fscrypt_pullback_bio_page(struct page **, bool);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
#endif /* _LINUX_FSCRYPT_SUPP_H */

View File

@@ -250,8 +250,8 @@ extern void fscrypt_restore_control_page(struct page *);
extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
unsigned int);
/* policy.c */
extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
@@ -320,14 +320,14 @@ static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
}
/* policy.c */
static inline int fscrypt_notsupp_process_policy(struct file *f,
const struct fscrypt_policy *p)
static inline int fscrypt_notsupp_ioctl_set_policy(struct file *f,
const void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_get_policy(struct inode *i,
struct fscrypt_policy *p)
static inline int fscrypt_notsupp_ioctl_get_policy(struct file *f,
void __user *arg)
{
return -EOPNOTSUPP;
}

View File

@@ -6,8 +6,8 @@
#include <linux/tracepoint.h>
#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev)
#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino
#define show_dev(dev) MAJOR(dev), MINOR(dev)
#define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino
TRACE_DEFINE_ENUM(NODE);
TRACE_DEFINE_ENUM(DATA);
@@ -15,6 +15,7 @@ TRACE_DEFINE_ENUM(META);
TRACE_DEFINE_ENUM(META_FLUSH);
TRACE_DEFINE_ENUM(INMEM);
TRACE_DEFINE_ENUM(INMEM_DROP);
TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
TRACE_DEFINE_ENUM(IPU);
TRACE_DEFINE_ENUM(OPU);
TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
@@ -43,6 +44,7 @@ TRACE_DEFINE_ENUM(CP_FASTBOOT);
TRACE_DEFINE_ENUM(CP_SYNC);
TRACE_DEFINE_ENUM(CP_RECOVERY);
TRACE_DEFINE_ENUM(CP_DISCARD);
TRACE_DEFINE_ENUM(CP_TRIMMED);
#define show_block_type(type) \
__print_symbolic(type, \
@@ -52,6 +54,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ META_FLUSH, "META_FLUSH" }, \
{ INMEM, "INMEM" }, \
{ INMEM_DROP, "INMEM_DROP" }, \
{ INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \
{ INMEM_REVOKE, "INMEM_REVOKE" }, \
{ IPU, "IN-PLACE" }, \
{ OPU, "OUT-OF-PLACE" })
@@ -80,6 +83,12 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ REQ_META | REQ_PRIO, "(MP)" }, \
{ 0, " \b" })
#define show_block_temp(temp) \
__print_symbolic(temp, \
{ HOT, "HOT" }, \
{ WARM, "WARM" }, \
{ COLD, "COLD" })
#define show_data_type(type) \
__print_symbolic(type, \
{ CURSEG_HOT_DATA, "Hot DATA" }, \
@@ -116,7 +125,8 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ CP_FASTBOOT, "Fastboot" }, \
{ CP_SYNC, "Sync" }, \
{ CP_RECOVERY, "Recovery" }, \
{ CP_DISCARD, "Discard" })
{ CP_DISCARD, "Discard" }, \
{ CP_UMOUNT | CP_TRIMMED, "Umount,Trimmed" })
struct victim_sel_policy;
struct f2fs_map_blocks;
@@ -239,7 +249,7 @@ TRACE_EVENT(f2fs_sync_fs,
),
TP_printk("dev = (%d,%d), superblock is %s, wait = %d",
show_dev(__entry),
show_dev(__entry->dev),
__entry->dirty ? "dirty" : "not dirty",
__entry->wait)
);
@@ -309,6 +319,13 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
TP_ARGS(inode, ret)
);
DEFINE_EVENT(f2fs__inode_exit, f2fs_drop_inode,
TP_PROTO(struct inode *inode, int ret),
TP_ARGS(inode, ret)
);
DEFINE_EVENT(f2fs__inode, f2fs_truncate,
TP_PROTO(struct inode *inode),
@@ -518,14 +535,14 @@ TRACE_EVENT(f2fs_map_blocks,
TRACE_EVENT(f2fs_background_gc,
TP_PROTO(struct super_block *sb, long wait_ms,
TP_PROTO(struct super_block *sb, unsigned int wait_ms,
unsigned int prefree, unsigned int free),
TP_ARGS(sb, wait_ms, prefree, free),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(long, wait_ms)
__field(unsigned int, wait_ms)
__field(unsigned int, prefree)
__field(unsigned int, free)
),
@@ -537,13 +554,120 @@ TRACE_EVENT(f2fs_background_gc,
__entry->free = free;
),
TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
show_dev(__entry),
TP_printk("dev = (%d,%d), wait_ms = %u, prefree = %u, free = %u",
show_dev(__entry->dev),
__entry->wait_ms,
__entry->prefree,
__entry->free)
);
TRACE_EVENT(f2fs_gc_begin,
TP_PROTO(struct super_block *sb, bool sync, bool background,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),
TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(bool, sync)
__field(bool, background)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
__field(unsigned int, free_sec)
__field(unsigned int, free_seg)
__field(int, reserved_seg)
__field(unsigned int, prefree_seg)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->sync = sync;
__entry->background = background;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
__entry->free_sec = free_sec;
__entry->free_seg = free_seg;
__entry->reserved_seg = reserved_seg;
__entry->prefree_seg = prefree_seg;
),
TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, "
"dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
__entry->sync,
__entry->background,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
__entry->free_sec,
__entry->free_seg,
__entry->reserved_seg,
__entry->prefree_seg)
);
TRACE_EVENT(f2fs_gc_end,
TP_PROTO(struct super_block *sb, int ret, int seg_freed,
int sec_freed, long long dirty_nodes,
long long dirty_dents, long long dirty_imeta,
unsigned int free_sec, unsigned int free_seg,
int reserved_seg, unsigned int prefree_seg),
TP_ARGS(sb, ret, seg_freed, sec_freed, dirty_nodes, dirty_dents,
dirty_imeta, free_sec, free_seg, reserved_seg, prefree_seg),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, ret)
__field(int, seg_freed)
__field(int, sec_freed)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
__field(unsigned int, free_sec)
__field(unsigned int, free_seg)
__field(int, reserved_seg)
__field(unsigned int, prefree_seg)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->ret = ret;
__entry->seg_freed = seg_freed;
__entry->sec_freed = sec_freed;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
__entry->free_sec = free_sec;
__entry->free_seg = free_seg;
__entry->reserved_seg = reserved_seg;
__entry->prefree_seg = prefree_seg;
),
TP_printk("dev = (%d,%d), ret = %d, seg_freed = %d, sec_freed = %d, "
"nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, "
"free_seg:%u, rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
__entry->ret,
__entry->seg_freed,
__entry->sec_freed,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
__entry->free_sec,
__entry->free_seg,
__entry->reserved_seg,
__entry->prefree_seg)
);
TRACE_EVENT(f2fs_get_victim,
TP_PROTO(struct super_block *sb, int type, int gc_type,
@@ -580,7 +704,7 @@ TRACE_EVENT(f2fs_get_victim,
TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u "
"ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u",
show_dev(__entry),
show_dev(__entry->dev),
show_data_type(__entry->type),
show_gc_type(__entry->gc_type),
show_alloc_mode(__entry->alloc_mode),
@@ -717,7 +841,7 @@ TRACE_EVENT(f2fs_reserve_new_blocks,
),
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
show_dev(__entry),
show_dev(__entry->dev),
(unsigned int)__entry->nid,
__entry->ofs_in_node,
(unsigned long long)__entry->count)
@@ -737,6 +861,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
__field(block_t, new_blkaddr)
__field(int, op)
__field(int, op_flags)
__field(int, temp)
__field(int, type)
),
@@ -748,16 +873,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
__entry->new_blkaddr = fio->new_blkaddr;
__entry->op = fio->op;
__entry->op_flags = fio->op_flags;
__entry->temp = fio->temp;
__entry->type = fio->type;
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"oldaddr = 0x%llx, newaddr = 0x%llx rw = %s%s, type = %s",
"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->old_blkaddr,
(unsigned long long)__entry->new_blkaddr,
show_bio_type(__entry->op, __entry->op_flags),
show_block_temp(__entry->temp),
show_block_type(__entry->type))
);
@@ -770,7 +897,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio,
TP_CONDITION(page->mapping)
);
DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write,
TP_PROTO(struct page *page, struct f2fs_io_info *fio),
@@ -787,6 +914,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, target)
__field(int, op)
__field(int, op_flags)
__field(int, type)
@@ -796,6 +924,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->target = bio->bi_bdev->bd_dev;
__entry->op = bio_op(bio);
__entry->op_flags = bio->bi_rw;
__entry->type = type;
@@ -803,8 +932,9 @@ DECLARE_EVENT_CLASS(f2fs__bio,
__entry->size = bio->bi_iter.bi_size;
),
TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
show_dev(__entry),
TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
show_dev(__entry->target),
show_dev(__entry->dev),
show_bio_type(__entry->op, __entry->op_flags),
show_block_type(__entry->type),
(unsigned long long)__entry->sector,
@@ -1101,16 +1231,16 @@ TRACE_EVENT(f2fs_write_checkpoint,
),
TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
show_dev(__entry),
show_dev(__entry->dev),
show_cpreason(__entry->reason),
__entry->msg)
);
TRACE_EVENT(f2fs_issue_discard,
DECLARE_EVENT_CLASS(f2fs_discard,
TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(sb, blkstart, blklen),
TP_ARGS(dev, blkstart, blklen),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1119,22 +1249,36 @@ TRACE_EVENT(f2fs_issue_discard,
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev = dev->bd_dev;
__entry->blkstart = blkstart;
__entry->blklen = blklen;
),
TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
show_dev(__entry),
show_dev(__entry->dev),
(unsigned long long)__entry->blkstart,
(unsigned long long)__entry->blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_queue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct super_block *sb, block_t blkstart),
TP_PROTO(struct block_device *dev, block_t blkstart),
TP_ARGS(sb, blkstart),
TP_ARGS(dev, blkstart),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1142,38 +1286,41 @@ TRACE_EVENT(f2fs_issue_reset_zone,
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev = dev->bd_dev;
__entry->blkstart = blkstart;
),
TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
show_dev(__entry),
show_dev(__entry->dev),
(unsigned long long)__entry->blkstart)
);
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct super_block *sb, unsigned int nobarrier,
unsigned int flush_merge),
TP_PROTO(struct block_device *dev, unsigned int nobarrier,
unsigned int flush_merge, int ret),
TP_ARGS(sb, nobarrier, flush_merge),
TP_ARGS(dev, nobarrier, flush_merge, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned int, nobarrier)
__field(unsigned int, flush_merge)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev = dev->bd_dev;
__entry->nobarrier = nobarrier;
__entry->flush_merge = flush_merge;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), %s %s",
show_dev(__entry),
TP_printk("dev = (%d,%d), %s %s, ret = %d",
show_dev(__entry->dev),
__entry->nobarrier ? "skip (nobarrier)" : "issue",
__entry->flush_merge ? " with flush_merge" : "")
__entry->flush_merge ? " with flush_merge" : "",
__entry->ret)
);
TRACE_EVENT(f2fs_lookup_extent_tree_start,
@@ -1286,7 +1433,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
),
TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u",
show_dev(__entry),
show_dev(__entry->dev),
__entry->node_cnt,
__entry->tree_cnt)
);
@@ -1333,7 +1480,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
),
TP_printk("dev = (%d,%d), %s, dirty count = %lld",
show_dev(__entry),
show_dev(__entry->dev),
show_file_type(__entry->type),
__entry->count)
);

View File

@@ -178,6 +178,23 @@ struct inodes_stat_t {
/* Policy provided via an ioctl on the topmost directory */
#define FS_KEY_DESCRIPTOR_SIZE 8
#define FS_POLICY_FLAGS_PAD_4 0x00
#define FS_POLICY_FLAGS_PAD_8 0x01
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
#define FS_POLICY_FLAGS_VALID 0x03
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
#define FS_ENCRYPTION_MODE_AES_256_XTS 1
#define FS_ENCRYPTION_MODE_AES_256_GCM 2
#define FS_ENCRYPTION_MODE_AES_256_CBC 3
#define FS_ENCRYPTION_MODE_AES_256_CTS 4
#define FS_ENCRYPTION_MODE_AES_128_CBC 5
#define FS_ENCRYPTION_MODE_AES_128_CTS 6
struct fscrypt_policy {
__u8 version;
__u8 contents_encryption_mode;
@@ -190,6 +207,19 @@ struct fscrypt_policy {
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
/* Parameters for passing an encryption key into the kernel keyring */
#define FS_KEY_DESC_PREFIX "fscrypt:"
#define FS_KEY_DESC_PREFIX_SIZE 8
/* Structure that userspace passes to the kernel keyring */
#define FS_MAX_KEY_SIZE 64
struct fscrypt_key {
__u32 mode;
__u8 raw[FS_MAX_KEY_SIZE];
__u32 size;
};
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*/

View File

@@ -348,6 +348,7 @@ struct address_space *page_mapping(struct page *page)
return NULL;
return page->mapping;
}
EXPORT_SYMBOL(page_mapping);
int overcommit_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,