Merge 037c50bfbe ("Merge tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux") into android-mainline

Steps on the way to 5.16-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ide84fd0886d433adb3c6d279c1ddaf4816055a3c
This commit is contained in:
Greg Kroah-Hartman
2021-11-03 11:49:14 +01:00
53 changed files with 4492 additions and 2946 deletions

View File

@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/list_sort.h>
#include "misc.h"
#include "ctree.h"
#include "block-group.h"
@@ -144,6 +145,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
*/
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
kfree(cache->free_space_ctl);
kfree(cache->physical_map);
kfree(cache);
}
}
@@ -902,6 +904,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cluster->refill_lock);
btrfs_clear_treelog_bg(block_group);
btrfs_clear_data_reloc_bg(block_group);
path = btrfs_alloc_path();
if (!path) {
@@ -1484,6 +1487,21 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
spin_unlock(&fs_info->unused_bgs_lock);
}
/*
* We want block groups with a low number of used bytes to be in the beginning
* of the list, so they will get reclaimed first.
*/
static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
const struct list_head *b)
{
const struct btrfs_block_group *bg1, *bg2;
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
return bg1->used > bg2->used;
}
void btrfs_reclaim_bgs_work(struct work_struct *work)
{
struct btrfs_fs_info *fs_info =
@@ -1508,6 +1526,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
spin_lock(&fs_info->unused_bgs_lock);
/*
* Sort happens under lock because we can't simply splice it and sort.
* The block groups might still be in use and reachable via bg_list,
* and their presence in the reclaim_bgs list must be preserved.
*/
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
while (!list_empty(&fs_info->reclaim_bgs)) {
u64 zone_unusable;
int ret = 0;
@@ -1895,6 +1919,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->discard_list);
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
INIT_LIST_HEAD(&cache->active_bg_list);
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
@@ -2035,6 +2060,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
*/
if (btrfs_is_zoned(info)) {
btrfs_calc_zone_unusable(cache);
/* Should not have any excluded extents. Just in case, though. */
btrfs_free_excluded_extents(cache);
} else if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
@@ -2062,15 +2089,18 @@ static int read_one_block_group(struct btrfs_fs_info *info,
link_block_group(cache);
set_avail_alloc_bits(info, cache->flags);
if (btrfs_chunk_readonly(info, cache->start)) {
if (btrfs_chunk_writeable(info, cache->start)) {
if (cache->used == 0) {
ASSERT(list_empty(&cache->bg_list));
if (btrfs_test_opt(info, DISCARD_ASYNC))
btrfs_discard_queue_work(&info->discard_ctl, cache);
else
btrfs_mark_bg_unused(cache);
}
} else {
inc_block_group_ro(cache, 1);
} else if (cache->used == 0) {
ASSERT(list_empty(&cache->bg_list));
if (btrfs_test_opt(info, DISCARD_ASYNC))
btrfs_discard_queue_work(&info->discard_ctl, cache);
else
btrfs_mark_bg_unused(cache);
}
return 0;
error:
btrfs_put_block_group(cache);
@@ -2438,6 +2468,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}
/*
* New block group is likely to be used soon. Try to activate it now.
* Failure is OK for now.
*/
btrfs_zone_activate(cache);
ret = exclude_super_stripes(cache);
if (ret) {
/* We may have excluded something, so call this just in case */
@@ -2479,7 +2515,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, 0, &cache->space_info);
cache->bytes_super, cache->zone_unusable,
&cache->space_info);
btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
@@ -2594,7 +2631,9 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
if (!--cache->ro) {
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes back */
cache->zone_unusable = cache->alloc_offset - cache->used;
cache->zone_unusable =
(cache->alloc_offset - cache->used) +
(cache->length - cache->zone_capacity);
sinfo->bytes_zone_unusable += cache->zone_unusable;
sinfo->bytes_readonly -= cache->zone_unusable;
}
@@ -3143,7 +3182,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
}
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc)
u64 bytenr, u64 num_bytes, bool alloc)
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_block_group *cache = NULL;
@@ -3380,36 +3419,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
*/
check_system_chunk(trans, flags);
bg = btrfs_alloc_chunk(trans, flags);
bg = btrfs_create_chunk(trans, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
goto out;
}
/*
* If this is a system chunk allocation then stop right here and do not
* add the chunk item to the chunk btree. This is to prevent a deadlock
* because this system chunk allocation can be triggered while COWing
* some extent buffer of the chunk btree and while holding a lock on a
* parent extent buffer, in which case attempting to insert the chunk
* item (or update the device item) would result in a deadlock on that
* parent extent buffer. In this case defer the chunk btree updates to
* the second phase of chunk allocation and keep our reservation until
* the second phase completes.
*
* This is a rare case and can only be triggered by the very few cases
* we have where we need to touch the chunk btree outside chunk allocation
* and chunk removal. These cases are basically adding a device, removing
* a device or resizing a device.
*/
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return 0;
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
/*
* Normally we are not expected to fail with -ENOSPC here, since we have
* previously reserved space in the system space_info and allocated one
* new system chunk if necessary. However there are two exceptions:
* new system chunk if necessary. However there are three exceptions:
*
* 1) We may have enough free space in the system space_info but all the
* existing system block groups have a profile which can not be used
@@ -3435,13 +3455,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
* with enough free space got turned into RO mode by a running scrub,
* and in this case we have to allocate a new one and retry. We only
* need do this allocate and retry once, since we have a transaction
* handle and scrub uses the commit root to search for block groups.
* handle and scrub uses the commit root to search for block groups;
*
* 3) We had one system block group with enough free space when we called
* check_system_chunk(), but after that, right before we tried to
* allocate the last extent buffer we needed, a discard operation came
* in and it temporarily removed the last free space entry from the
* block group (discard removes a free space entry, discards it, and
* then adds back the entry to the block group cache).
*/
if (ret == -ENOSPC) {
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
struct btrfs_block_group *sys_bg;
sys_bg = btrfs_alloc_chunk(trans, sys_flags);
sys_bg = btrfs_create_chunk(trans, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
@@ -3519,7 +3546,15 @@ out:
* properly, either intentionally or as a bug. One example where this is
* done intentionally is fsync, as it does not reserve any transaction units
* and ends up allocating a variable number of metadata extents for log
* tree extent buffers.
* tree extent buffers;
*
* 4) The task has reserved enough transaction units / metadata space, but right
* before it tries to allocate the last extent buffer it needs, a discard
* operation comes in and, temporarily, removes the last free space entry from
* the only metadata block group that had free space (discard starts by
* removing a free space entry from a block group, then does the discard
* operation and, once it's done, it adds back the free space entry to the
* block group).
*
* We also need this 2 phases setup when adding a device to a filesystem with
* a seed device - we must create new metadata and system chunks without adding
@@ -3537,14 +3572,14 @@ out:
* This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of
* the system chunk array due to concurrent allocations") provides more details.
*
* For allocation of system chunks, we defer the updates and insertions into the
* chunk btree to phase 2. This is to prevent deadlocks on extent buffers because
* if the chunk allocation is triggered while COWing an extent buffer of the
* chunk btree, we are holding a lock on the parent of that extent buffer and
* doing the chunk btree updates and insertions can require locking that parent.
* This is for the very few and rare cases where we update the chunk btree that
* are not chunk allocation or chunk removal: adding a device, removing a device
* or resizing a device.
* Allocation of system chunks does not happen through this function. A task that
* needs to update the chunk btree (the only btree that uses system chunks), must
* preallocate chunk space by calling either check_system_chunk() or
* btrfs_reserve_chunk_metadata() - the former is used when allocating a data or
* metadata chunk or when removing a chunk, while the later is used before doing
* a modification to the chunk btree - use cases for the later are adding,
* removing and resizing a device as well as relocation of a system chunk.
* See the comment below for more details.
*
* The reservation of system space, done through check_system_chunk(), as well
* as all the updates and insertions into the chunk btree must be done while
@@ -3581,11 +3616,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (trans->allocating_chunk)
return -ENOSPC;
/*
* If we are removing a chunk, don't re-enter or we would deadlock.
* System space reservation and system chunk allocation is done by the
* chunk remove operation (btrfs_remove_chunk()).
* Allocation of system chunks can not happen through this path, as we
* could end up in a deadlock if we are allocating a data or metadata
* chunk and there is another task modifying the chunk btree.
*
* This is because while we are holding the chunk mutex, we will attempt
* to add the new chunk item to the chunk btree or update an existing
* device item in the chunk btree, while the other task that is modifying
* the chunk btree is attempting to COW an extent buffer while holding a
* lock on it and on its parent - if the COW operation triggers a system
* chunk allocation, then we can deadlock because we are holding the
* chunk mutex and we may need to access that extent buffer or its parent
* in order to add the chunk item or update a device item.
*
* Tasks that want to modify the chunk tree should reserve system space
* before updating the chunk btree, by calling either
* btrfs_reserve_chunk_metadata() or check_system_chunk().
* It's possible that after a task reserves the space, it still ends up
* here - this happens in the cases described above at do_chunk_alloc().
* The task will have to either retry or fail.
*/
if (trans->removing_chunk)
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return -ENOSPC;
space_info = btrfs_find_space_info(fs_info, flags);
@@ -3684,17 +3735,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
return num_dev;
}
/*
* Reserve space in the system space for allocating or removing a chunk
*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
static void reserve_chunk_space(struct btrfs_trans_handle *trans,
u64 bytes,
u64 type)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
u64 thresh;
int ret = 0;
u64 num_devs;
/*
* Needed because we can end up allocating a system chunk and for an
@@ -3707,19 +3755,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
num_devs = get_profile_num_devs(fs_info, type);
/* num_devs device items to update and 1 chunk item to add or remove */
thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
btrfs_calc_insert_metadata_size(fs_info, 1);
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
left, bytes, type);
btrfs_dump_space_info(fs_info, info, 0, 0);
}
if (left < thresh) {
if (left < bytes) {
u64 flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *bg;
@@ -3728,21 +3770,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
* needing it, as we might not need to COW all nodes/leafs from
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*
* Also, if our caller is allocating a system chunk, do not
* attempt to insert the chunk item in the chunk btree, as we
* could deadlock on an extent buffer since our caller may be
* COWing an extent buffer from the chunk btree.
*/
bg = btrfs_alloc_chunk(trans, flags);
bg = btrfs_create_chunk(trans, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
} else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
} else {
/*
* If we fail to add the chunk item here, we end up
* trying again at phase 2 of chunk allocation, at
* btrfs_create_pending_block_groups(). So ignore
* any error here.
* any error here. An ENOSPC here could happen, due to
* the cases described at do_chunk_alloc() - the system
* block group we just created was just turned into RO
* mode by a scrub for example, or a running discard
* temporarily removed its free space entries, etc.
*/
btrfs_chunk_alloc_add_chunk_item(trans, bg);
}
@@ -3751,12 +3792,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
if (!ret) {
ret = btrfs_block_rsv_add(fs_info->chunk_root,
&fs_info->chunk_block_rsv,
thresh, BTRFS_RESERVE_NO_FLUSH);
bytes, BTRFS_RESERVE_NO_FLUSH);
if (!ret)
trans->chunk_bytes_reserved += thresh;
trans->chunk_bytes_reserved += bytes;
}
}
/*
* Reserve space in the system space for allocating or removing a chunk.
* The caller must be holding fs_info->chunk_mutex.
*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
const u64 num_devs = get_profile_num_devs(fs_info, type);
u64 bytes;
/* num_devs device items to update and 1 chunk item to add or remove. */
bytes = btrfs_calc_metadata_size(fs_info, num_devs) +
btrfs_calc_insert_metadata_size(fs_info, 1);
reserve_chunk_space(trans, bytes, type);
}
/*
* Reserve space in the system space, if needed, for doing a modification to the
* chunk btree.
*
* @trans: A transaction handle.
* @is_item_insertion: Indicate if the modification is for inserting a new item
* in the chunk btree or if it's for the deletion or update
* of an existing item.
*
* This is used in a context where we need to update the chunk btree outside
* block group allocation and removal, to avoid a deadlock with a concurrent
* task that is allocating a metadata or data block group and therefore needs to
* update the chunk btree while holding the chunk mutex. After the update to the
* chunk btree is done, btrfs_trans_release_chunk_metadata() should be called.
*
*/
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
bool is_item_insertion)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
u64 bytes;
if (is_item_insertion)
bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
else
bytes = btrfs_calc_metadata_size(fs_info, 1);
mutex_lock(&fs_info->chunk_mutex);
reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM);
mutex_unlock(&fs_info->chunk_mutex);
}
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
{
struct btrfs_block_group *block_group;
@@ -3833,6 +3923,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
}
spin_unlock(&info->unused_bgs_lock);
spin_lock(&info->zone_active_bgs_lock);
while (!list_empty(&info->zone_active_bgs)) {
block_group = list_first_entry(&info->zone_active_bgs,
struct btrfs_block_group,
active_bg_list);
list_del_init(&block_group->active_bg_list);
btrfs_put_block_group(block_group);
}
spin_unlock(&info->zone_active_bgs_lock);
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group,

View File

@@ -98,6 +98,7 @@ struct btrfs_block_group {
unsigned int to_copy:1;
unsigned int relocating_repair:1;
unsigned int chunk_item_inserted:1;
unsigned int zone_is_active:1;
int disk_cache_state;
@@ -202,7 +203,10 @@ struct btrfs_block_group {
*/
u64 alloc_offset;
u64 zone_unusable;
u64 zone_capacity;
u64 meta_write_pointer;
struct map_lookup *physical_map;
struct list_head active_bg_list;
};
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
@@ -280,7 +284,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc);
u64 bytenr, u64 num_bytes, bool alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
u64 ram_bytes, u64 num_bytes, int delalloc);
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
@@ -289,6 +293,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
bool is_item_insertion);
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
int btrfs_free_block_groups(struct btrfs_fs_info *info);

View File

@@ -138,17 +138,34 @@ struct btrfs_inode {
/* a local copy of root's last_log_commit */
int last_log_commit;
/* total number of bytes pending delalloc, used by stat to calc the
* real block usage of the file
*/
u64 delalloc_bytes;
union {
/*
* Total number of bytes pending delalloc, used by stat to
* calculate the real block usage of the file. This is used
* only for files.
*/
u64 delalloc_bytes;
/*
* The offset of the last dir item key that was logged.
* This is used only for directories.
*/
u64 last_dir_item_offset;
};
/*
* Total number of bytes pending delalloc that fall within a file
* range that is either a hole or beyond EOF (and no prealloc extent
* exists in the range). This is always <= delalloc_bytes.
*/
u64 new_delalloc_bytes;
union {
/*
* Total number of bytes pending delalloc that fall within a file
* range that is either a hole or beyond EOF (and no prealloc extent
* exists in the range). This is always <= delalloc_bytes and this
* is used only for files.
*/
u64 new_delalloc_bytes;
/*
* The offset of the last dir index key that was logged.
* This is used only for directories.
*/
u64 last_dir_index_offset;
};
/*
* total number of bytes pending defrag, used by stat to check whether
@@ -339,7 +356,12 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
struct btrfs_dio_private {
struct inode *inode;
u64 logical_offset;
/*
* Since DIO can use anonymous page, we cannot use page_offset() to
* grab the file offset, thus need a dedicated member for file offset.
*/
u64 file_offset;
u64 disk_bytenr;
/* Used for bio::bi_size */
u32 bytes;

View File

@@ -186,7 +186,6 @@ struct btrfsic_dev_state {
struct list_head collision_resolving_node; /* list node */
struct btrfsic_block dummy_block_for_bio_bh_flush;
u64 last_flush_gen;
char name[BDEVNAME_SIZE];
};
struct btrfsic_block_hashtable {
@@ -403,7 +402,6 @@ static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
ds->bdev = NULL;
ds->state = NULL;
ds->name[0] = '\0';
INIT_LIST_HEAD(&ds->collision_resolving_node);
ds->last_flush_gen = 0;
btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
@@ -756,10 +754,10 @@ static int btrfsic_process_superblock_dev_mirror(
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
btrfs_info_in_rcu(fs_info,
"new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
superblock_bdev,
rcu_str_deref(device->name), dev_bytenr,
dev_state->name, dev_bytenr,
dev_state->bdev, dev_bytenr,
superblock_mirror_num);
list_add(&superblock_tmp->all_blocks_node,
&state->all_blocks_list);
@@ -938,9 +936,10 @@ continue_with_current_leaf_stack_frame:
if (disk_item_offset + sizeof(struct btrfs_item) >
sf->block_ctx->len) {
leaf_item_out_of_bounce_error:
pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
pr_info(
"btrfsic: leaf item out of bounce at logical %llu, dev %pg\n",
sf->block_ctx->start,
sf->block_ctx->dev->name);
sf->block_ctx->dev->bdev);
goto one_stack_frame_backwards;
}
btrfsic_read_from_block_data(sf->block_ctx,
@@ -1058,9 +1057,10 @@ continue_with_current_node_stack_frame:
(uintptr_t)nodehdr;
if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
sf->block_ctx->len) {
pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n",
pr_info(
"btrfsic: node item out of bounce at logical %llu, dev %pg\n",
sf->block_ctx->start,
sf->block_ctx->dev->name);
sf->block_ctx->dev->bdev);
goto one_stack_frame_backwards;
}
btrfsic_read_from_block_data(
@@ -1228,15 +1228,17 @@ static int btrfsic_create_link_to_next_block(
if (next_block->logical_bytenr != next_bytenr &&
!(!next_block->is_metadata &&
0 == next_block->logical_bytenr))
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
next_bytenr, next_block_ctx->dev->name,
pr_info(
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
next_bytenr, next_block_ctx->dev->bdev,
next_block_ctx->dev_bytenr, *mirror_nump,
btrfsic_get_block_type(state,
next_block),
next_block->logical_bytenr);
else
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
next_bytenr, next_block_ctx->dev->name,
pr_info(
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n",
next_bytenr, next_block_ctx->dev->bdev,
next_block_ctx->dev_bytenr, *mirror_nump,
btrfsic_get_block_type(state,
next_block));
@@ -1324,8 +1326,8 @@ static int btrfsic_handle_extent_data(
if (file_extent_item_offset +
offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
block_ctx->len) {
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
block_ctx->start, block_ctx->dev->name);
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
block_ctx->start, block_ctx->dev->bdev);
return -1;
}
@@ -1344,8 +1346,8 @@ static int btrfsic_handle_extent_data(
if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
block_ctx->len) {
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
block_ctx->start, block_ctx->dev->name);
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
block_ctx->start, block_ctx->dev->bdev);
return -1;
}
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
@@ -1421,9 +1423,10 @@ static int btrfsic_handle_extent_data(
next_block->logical_bytenr != next_bytenr &&
!(!next_block->is_metadata &&
0 == next_block->logical_bytenr)) {
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n",
pr_info(
"referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n",
next_bytenr,
next_block_ctx.dev->name,
next_block_ctx.dev->bdev,
next_block_ctx.dev_bytenr,
mirror_num,
next_block->logical_bytenr);
@@ -1455,7 +1458,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_fs_info *fs_info = state->fs_info;
int ret;
u64 length;
struct btrfs_bio *multi = NULL;
struct btrfs_io_context *multi = NULL;
struct btrfs_device *device;
length = len;
@@ -1561,7 +1564,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
struct bio *bio;
unsigned int j;
bio = btrfs_io_bio_alloc(num_pages - i);
bio = btrfs_bio_alloc(num_pages - i);
bio_set_dev(bio, block_ctx->dev->bdev);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio->bi_opf = REQ_OP_READ;
@@ -1577,8 +1580,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return -1;
}
if (submit_bio_wait(bio)) {
pr_info("btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
pr_info("btrfsic: read error at logical %llu dev %pg!\n",
block_ctx->start, block_ctx->dev->bdev);
bio_put(bio);
return -1;
}
@@ -1602,33 +1605,35 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
const struct btrfsic_block_link *l;
pr_info("%c-block @%llu (%s/%llu/%d)\n",
pr_info("%c-block @%llu (%pg/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
b_all->logical_bytenr, b_all->dev_state->name,
b_all->logical_bytenr, b_all->dev_state->bdev,
b_all->dev_bytenr, b_all->mirror_num);
list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n",
pr_info(
" %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
b_all->logical_bytenr, b_all->dev_state->name,
b_all->logical_bytenr, b_all->dev_state->bdev,
b_all->dev_bytenr, b_all->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
pr_info(
" %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
b_all->logical_bytenr, b_all->dev_state->name,
b_all->logical_bytenr, b_all->dev_state->bdev,
b_all->dev_bytenr, b_all->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
l->block_ref_from->dev_state->bdev,
l->block_ref_from->dev_bytenr,
l->block_ref_from->mirror_num);
}
@@ -1743,16 +1748,18 @@ again:
if (block->logical_bytenr != bytenr &&
!(!block->is_metadata &&
block->logical_bytenr == 0))
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
bytenr, dev_state->name,
pr_info(
"written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
bytenr, dev_state->bdev,
dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state,
block),
block->logical_bytenr);
else
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
bytenr, dev_state->name,
pr_info(
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
bytenr, dev_state->bdev,
dev_bytenr, block->mirror_num,
btrfsic_get_block_type(state,
block));
@@ -1767,8 +1774,9 @@ again:
processed_len = state->datablock_size;
bytenr = block->logical_bytenr;
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
bytenr, dev_state->name, dev_bytenr,
pr_info(
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
bytenr, dev_state->bdev, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block));
}
@@ -1778,9 +1786,10 @@ again:
list_empty(&block->ref_to_list) ? ' ' : '!',
list_empty(&block->ref_from_list) ? ' ' : '!');
if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
pr_info(
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
btrfsic_get_block_type(state, block), bytenr,
dev_state->name, dev_bytenr, block->mirror_num,
dev_state->bdev, dev_bytenr, block->mirror_num,
block->generation,
btrfs_disk_key_objectid(&block->disk_key),
block->disk_key.type,
@@ -1792,9 +1801,10 @@ again:
}
if (!block->is_iodone && !block->never_written) {
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
pr_info(
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
btrfsic_get_block_type(state, block), bytenr,
dev_state->name, dev_bytenr, block->mirror_num,
dev_state->bdev, dev_bytenr, block->mirror_num,
block->generation,
btrfs_stack_header_generation(
(struct btrfs_header *)
@@ -1921,8 +1931,9 @@ again:
if (!is_metadata) {
processed_len = state->datablock_size;
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("Written block (%s/%llu/?) !found in hash table, D.\n",
dev_state->name, dev_bytenr);
pr_info(
"written block (%pg/%llu/?) !found in hash table, D\n",
dev_state->bdev, dev_bytenr);
if (!state->include_extent_data) {
/* ignore that written D block */
goto continue_loop;
@@ -1939,8 +1950,9 @@ again:
btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
dev_bytenr);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n",
bytenr, dev_state->name, dev_bytenr);
pr_info(
"written block @%llu (%pg/%llu/?) !found in hash table, M\n",
bytenr, dev_state->bdev, dev_bytenr);
}
block_ctx.dev = dev_state;
@@ -1995,9 +2007,9 @@ again:
block->next_in_same_bio = NULL;
}
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("New written %c-block @%llu (%s/%llu/%d)\n",
pr_info("new written %c-block @%llu (%pg/%llu/%d)\n",
is_metadata ? 'M' : 'D',
block->logical_bytenr, block->dev_state->name,
block->logical_bytenr, block->dev_state->bdev,
block->dev_bytenr, block->mirror_num);
list_add(&block->all_blocks_node, &state->all_blocks_list);
btrfsic_block_hashtable_add(block, &state->block_hashtable);
@@ -2041,10 +2053,10 @@ static void btrfsic_bio_end_io(struct bio *bp)
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n",
bp->bi_status,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->logical_bytenr, dev_state->bdev,
block->dev_bytenr, block->mirror_num);
next_block = block->next_in_same_bio;
block->iodone_w_error = iodone_w_error;
@@ -2052,8 +2064,8 @@ static void btrfsic_bio_end_io(struct bio *bp)
dev_state->last_flush_gen++;
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io() new %s flush_gen=%llu\n",
dev_state->name,
pr_info("bio_end_io() new %pg flush_gen=%llu\n",
dev_state->bdev,
dev_state->last_flush_gen);
}
if (block->submit_bio_bh_rw & REQ_FUA)
@@ -2078,17 +2090,19 @@ static int btrfsic_process_written_superblock(
if (!(superblock->generation > state->max_superblock_generation ||
0 == state->max_superblock_generation)) {
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n",
pr_info(
"btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n",
superblock->logical_bytenr,
superblock->dev_state->name,
superblock->dev_state->bdev,
superblock->dev_bytenr, superblock->mirror_num,
btrfs_super_generation(super_hdr),
state->max_superblock_generation);
} else {
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n",
pr_info(
"btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n",
superblock->logical_bytenr,
superblock->dev_state->name,
superblock->dev_state->bdev,
superblock->dev_bytenr, superblock->mirror_num,
btrfs_super_generation(super_hdr),
state->max_superblock_generation);
@@ -2232,38 +2246,42 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
*/
list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n",
pr_info(
"rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n",
recursion_level,
btrfsic_get_block_type(state, block),
block->logical_bytenr, block->dev_state->name,
block->logical_bytenr, block->dev_state->bdev,
block->dev_bytenr, block->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
if (l->block_ref_to->never_written) {
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n",
pr_info(
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n",
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
} else if (!l->block_ref_to->is_iodone) {
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n",
pr_info(
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n",
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
} else if (l->block_ref_to->iodone_w_error) {
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n",
pr_info(
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n",
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
@@ -2273,10 +2291,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
l->parent_generation &&
BTRFSIC_GENERATION_UNKNOWN !=
l->block_ref_to->generation) {
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n",
pr_info(
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n",
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num,
l->block_ref_to->generation,
@@ -2284,10 +2303,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
ret = -1;
} else if (l->block_ref_to->flush_gen >
l->block_ref_to->dev_state->last_flush_gen) {
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
pr_info(
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
l->block_ref_to->dev_state->bdev,
l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num, block->flush_gen,
l->block_ref_to->dev_state->last_flush_gen);
@@ -2324,15 +2344,16 @@ static int btrfsic_is_block_ref_by_superblock(
*/
list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
pr_info(
"rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
recursion_level,
btrfsic_get_block_type(state, block),
block->logical_bytenr, block->dev_state->name,
block->logical_bytenr, block->dev_state->bdev,
block->dev_bytenr, block->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
l->block_ref_from->dev_state->bdev,
l->block_ref_from->dev_bytenr,
l->block_ref_from->mirror_num);
if (l->block_ref_from->is_superblock &&
@@ -2354,30 +2375,30 @@ static int btrfsic_is_block_ref_by_superblock(
static void btrfsic_print_add_link(const struct btrfsic_state *state,
const struct btrfsic_block_link *l)
{
pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
l->block_ref_from->dev_state->bdev,
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
static void btrfsic_print_rem_link(const struct btrfsic_state *state,
const struct btrfsic_block_link *l)
{
pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
l->block_ref_from->dev_state->bdev,
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
btrfsic_get_block_type(state, l->block_ref_to),
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
@@ -2419,9 +2440,9 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
* This algorithm is recursive because the amount of used stack space
* is very small and the max recursion depth is limited.
*/
indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)",
btrfsic_get_block_type(state, block),
block->logical_bytenr, block->dev_state->name,
block->logical_bytenr, block->dev_state->bdev,
block->dev_bytenr, block->mirror_num);
if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
printk("[...]\n");
@@ -2542,10 +2563,10 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
block->never_written = never_written;
block->mirror_num = mirror_num;
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
pr_info("New %s%c-block @%llu (%s/%llu/%d)\n",
pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n",
additional_string,
btrfsic_get_block_type(state, block),
block->logical_bytenr, dev_state->name,
block->logical_bytenr, dev_state->bdev,
block->dev_bytenr, mirror_num);
list_add(&block->all_blocks_node, &state->all_blocks_list);
btrfsic_block_hashtable_add(block, &state->block_hashtable);
@@ -2592,8 +2613,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
}
if (WARN_ON(!match)) {
pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n",
bytenr, dev_state->name, dev_bytenr);
pr_info(
"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n",
bytenr, dev_state->bdev, dev_bytenr);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
ret = btrfsic_map_block(state, bytenr,
state->metablock_size,
@@ -2601,8 +2623,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
if (ret)
continue;
pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n",
bytenr, block_ctx.dev->name,
pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n",
bytenr, block_ctx.dev->bdev,
block_ctx.dev_bytenr, mirror_num);
}
}
@@ -2675,8 +2697,9 @@ static void __btrfsic_submit_bio(struct bio *bio)
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
BTRFSIC_PRINT_MASK_VERBOSE)))
pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n",
dev_state->name);
pr_info(
"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
dev_state->bdev);
} else {
struct btrfsic_block *const block =
&dev_state->dummy_block_for_bio_bh_flush;
@@ -2751,7 +2774,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
list_for_each_entry(device, dev_head, dev_list) {
struct btrfsic_dev_state *ds;
const char *p;
if (!device->bdev || !device->name)
continue;
@@ -2763,10 +2785,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
}
ds->bdev = device->bdev;
ds->state = state;
bdevname(ds->bdev, ds->name);
ds->name[BDEVNAME_SIZE - 1] = '\0';
p = kbasename(ds->name);
strlcpy(ds->name, p, sizeof(ds->name));
btrfsic_dev_state_hashtable_add(ds,
&btrfsic_dev_state_hashtable);
}
@@ -2844,9 +2862,10 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
if (b_all->is_iodone || b_all->never_written)
btrfsic_block_free(b_all);
else
pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n",
pr_info(
"btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n",
btrfsic_get_block_type(state, b_all),
b_all->logical_bytenr, b_all->dev_state->name,
b_all->logical_bytenr, b_all->dev_state->bdev,
b_all->dev_bytenr, b_all->mirror_num);
}

View File

@@ -29,6 +29,7 @@
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
#include "subpage.h"
#include "zoned.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@@ -181,9 +182,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
if (btrfs_io_bio(bio)->device)
if (btrfs_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_io_bio(bio)->device,
btrfs_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
@@ -194,6 +195,87 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
return 0;
}
/*
* Reduce bio and io accounting for a compressed_bio with its corresponding bio.
*
* Return true if there is no pending bio nor io.
* Return false otherwise.
*/
static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
unsigned int bi_size = 0;
bool last_io = false;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
/*
* At endio time, bi_iter.bi_size doesn't represent the real bio size.
* Thus here we have to iterate through all segments to grab correct
* bio size.
*/
bio_for_each_segment_all(bvec, bio, iter_all)
bi_size += bvec->bv_len;
if (bio->bi_status)
cb->errors = 1;
ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
&cb->pending_sectors);
/*
* Here we must wake up the possible error handler after all other
* operations on @cb finished, or we can race with
* finish_compressed_bio_*() which may free @cb.
*/
wake_up_var(cb);
return last_io;
}
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
{
unsigned int index;
struct page *page;
/* Release the compressed pages */
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
put_page(page);
}
/* Do io completion on the original bio */
if (cb->errors) {
bio_io_error(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
ASSERT(bio);
ASSERT(!bio->bi_status);
/*
* We have verified the checksum already, set page checked so
* the end_io handlers know about it
*/
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset;
btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
bvec->bv_page, bvec_start,
bvec->bv_len);
}
bio_endio(cb->orig_bio);
}
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
}
/* when we finish reading compressed pages from the disk, we
* decompress them and then run the bio end_io routines on the
* decompressed pages (in the inode address space).
@@ -208,25 +290,17 @@ static void end_compressed_bio_read(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
unsigned int index;
unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
unsigned int mirror = btrfs_bio(bio)->mirror_num;
int ret = 0;
if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
* extent, just exit
*/
if (!refcount_dec_and_test(&cb->pending_bios))
if (!dec_and_test_compressed_bio(cb, bio))
goto out;
/*
* Record the correct mirror_num in cb->orig_bio so that
* read-repair can work properly.
*/
btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
btrfs_bio(cb->orig_bio)->mirror_num = mirror;
cb->mirror_num = mirror;
/*
@@ -250,36 +324,7 @@ static void end_compressed_bio_read(struct bio *bio)
csum_failed:
if (ret)
cb->errors = 1;
/* release the compressed pages */
index = 0;
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
put_page(page);
}
/* do io completion on the original bio */
if (cb->errors) {
bio_io_error(cb->orig_bio);
} else {
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
/*
* we have verified the checksum already, set page
* checked so the end_io handlers know about it
*/
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
SetPageChecked(bvec->bv_page);
bio_endio(cb->orig_bio);
}
/* finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
finish_compressed_bio_read(cb, bio);
out:
bio_put(bio);
}
@@ -291,6 +336,7 @@ out:
static noinline void end_compressed_writeback(struct inode *inode,
const struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
@@ -313,7 +359,8 @@ static noinline void end_compressed_writeback(struct inode *inode,
for (i = 0; i < ret; i++) {
if (cb->errors)
SetPageError(pages[i]);
end_page_writeback(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
cb->start, cb->len);
put_page(pages[i]);
}
nr_pages -= ret;
@@ -322,60 +369,127 @@ static noinline void end_compressed_writeback(struct inode *inode,
/* the inode may be gone now */
}
/*
* do the cleanup once all the compressed pages hit the disk.
* This will clear writeback on the file pages and free the compressed
* pages.
*
* This also calls the writeback end hooks for the file pages so that
* metadata and checksums can be updated in the file.
*/
static void end_compressed_bio_write(struct bio *bio)
static void finish_compressed_bio_write(struct compressed_bio *cb)
{
struct compressed_bio *cb = bio->bi_private;
struct inode *inode;
struct page *page;
struct inode *inode = cb->inode;
unsigned int index;
if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
* extent, just exit
/*
* Ok, we're the last bio for this extent, step one is to call back
* into the FS and do all the end_io operations.
*/
if (!refcount_dec_and_test(&cb->pending_bios))
goto out;
/* ok, we're the last bio for this extent, step one is to
* call back into the FS and do all the end_io operations
*/
inode = cb->inode;
btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
!cb->errors);
end_compressed_writeback(inode, cb);
/* note, our inode could be gone now */
/* Note, our inode could be gone now */
/*
* release the compressed pages, these came from alloc_page and
* Release the compressed pages, these came from alloc_page and
* are not attached to the inode at all
*/
index = 0;
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
struct page *page = cb->compressed_pages[index];
page->mapping = NULL;
put_page(page);
}
/* finally free the cb struct */
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
}
/*
* Do the cleanup once all the compressed pages hit the disk. This will clear
* writeback on the file pages and free the compressed pages.
*
* This also calls the writeback end hooks for the file pages so that metadata
* and checksums can be updated in the file.
*/
static void end_compressed_bio_write(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
if (!dec_and_test_compressed_bio(cb, bio))
goto out;
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
finish_compressed_bio_write(cb);
out:
bio_put(bio);
}
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
struct compressed_bio *cb,
struct bio *bio, int mirror_num)
{
blk_status_t ret;
ASSERT(bio->bi_iter.bi_size);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
if (ret)
return ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num);
return ret;
}
/*
* Allocate a compressed_bio, which will be used to read/write on-disk
* (aka, compressed) * data.
*
* @cb: The compressed_bio structure, which records all the needed
* information to bind the compressed data to the uncompressed
* page cache.
* @disk_byten: The logical bytenr where the compressed data will be read
* from or written to.
* @endio_func: The endio function to call after the IO for compressed data
* is finished.
* @next_stripe_start: Return value of logical bytenr of where next stripe starts.
* Let the caller know to only fill the bio up to the stripe
* boundary.
*/
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
unsigned int opf, bio_end_io_t endio_func,
u64 *next_stripe_start)
{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
struct btrfs_io_geometry geom;
struct extent_map *em;
struct bio *bio;
int ret;
bio = btrfs_bio_alloc(BIO_MAX_VECS);
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bio->bi_opf = opf;
bio->bi_private = cb;
bio->bi_end_io = endio_func;
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
if (IS_ERR(em)) {
bio_put(bio);
return ERR_CAST(em);
}
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
free_extent_map(em);
if (ret < 0) {
bio_put(bio);
return ERR_PTR(ret);
}
*next_stripe_start = disk_bytenr + geom.len;
return bio;
}
/*
* worker function to build and submit bios for previously compressed pages.
* The corresponding pages in the inode should be marked for writeback
@@ -396,20 +510,19 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL;
struct compressed_bio *cb;
unsigned long bytes_left;
int pg_index = 0;
struct page *page;
u64 first_byte = disk_start;
u64 cur_disk_bytenr = disk_start;
u64 next_stripe_start;
blk_status_t ret;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
const bool use_append = btrfs_use_zone_append(inode, disk_start);
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
WARN_ON(!PAGE_ALIGNED(start));
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->inode = &inode->vfs_inode;
cb->start = start;
@@ -420,118 +533,100 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
bio = btrfs_bio_alloc(first_byte);
bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
while (cur_disk_bytenr < disk_start + compressed_len) {
u64 offset = cur_disk_bytenr - disk_start;
unsigned int index = offset >> PAGE_SHIFT;
unsigned int real_size;
unsigned int added;
struct page *page = compressed_pages[index];
bool submit = false;
if (use_append) {
struct btrfs_device *device;
device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
if (IS_ERR(device)) {
kfree(cb);
bio_put(bio);
return BLK_STS_NOTSUPP;
/* Allocate new bio if submitted or not yet allocated */
if (!bio) {
bio = alloc_compressed_bio(cb, cur_disk_bytenr,
bio_op | write_flags, end_compressed_bio_write,
&next_stripe_start);
if (IS_ERR(bio)) {
ret = errno_to_blk_status(PTR_ERR(bio));
bio = NULL;
goto finish_cb;
}
}
bio_set_dev(bio, device->bdev);
}
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
kthread_associate_blkcg(blkcg_css);
}
refcount_set(&cb->pending_bios, 1);
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
int submit = 0;
int len = 0;
page = compressed_pages[pg_index];
page->mapping = inode->vfs_inode.i_mapping;
if (bio->bi_iter.bi_size)
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0);
/*
* We should never reach next_stripe_start start as we will
* submit comp_bio when reach the boundary immediately.
*/
ASSERT(cur_disk_bytenr != next_stripe_start);
/*
* Page can only be added to bio if the current bio fits in
* stripe.
* We have various limits on the real read size:
* - stripe boundary
* - page boundary
* - compressed length boundary
*/
if (!submit) {
if (pg_index == 0 && use_append)
len = bio_add_zone_append_page(bio, page,
PAGE_SIZE, 0);
else
len = bio_add_page(bio, page, PAGE_SIZE, 0);
}
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
page->mapping = NULL;
if (submit || len < PAGE_SIZE) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
* we inc the count. Otherwise, the cb might get
* freed before we're done setting it up
*/
refcount_inc(&cb->pending_bios);
ret = btrfs_bio_wq_end_io(fs_info, bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
if (use_append)
added = bio_add_zone_append_page(bio, page, real_size,
offset_in_page(offset));
else
added = bio_add_page(bio, page, real_size,
offset_in_page(offset));
/* Reached zoned boundary */
if (added == 0)
submit = true;
cur_disk_bytenr += added;
/* Reached stripe boundary */
if (cur_disk_bytenr == next_stripe_start)
submit = true;
/* Finished the range */
if (cur_disk_bytenr == disk_start + compressed_len)
submit = true;
if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1);
BUG_ON(ret); /* -ENOMEM */
if (ret)
goto finish_cb;
}
ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
}
bio = btrfs_bio_alloc(first_byte);
bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT;
/*
* Use bio_add_page() to ensure the bio has at least one
* page.
*/
bio_add_page(bio, page, PAGE_SIZE, 0);
ret = submit_compressed_bio(fs_info, cb, bio, 0);
if (ret)
goto finish_cb;
bio = NULL;
}
if (bytes_left < PAGE_SIZE) {
btrfs_info(fs_info,
"bytes left %lu compress len %u nr %u",
bytes_left, cb->compressed_len, cb->nr_pages);
}
bytes_left -= PAGE_SIZE;
first_byte += PAGE_SIZE;
cond_resched();
}
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1);
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
}
if (blkcg_css)
kthread_associate_blkcg(NULL);
return 0;
finish_cb:
if (bio) {
bio->bi_status = ret;
bio_endio(bio);
}
/* Last byte of @cb is submitted, endio will free @cb */
if (cur_disk_bytenr == disk_start + compressed_len)
return ret;
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
(disk_start + compressed_len - cur_disk_bytenr) >>
fs_info->sectorsize_bits);
/*
* Even with previous bio ended, we should still have io not yet
* submitted, thus need to finish manually.
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_write(cb);
return ret;
}
static u64 bio_end_offset(struct bio *bio)
@@ -541,25 +636,33 @@ static u64 bio_end_offset(struct bio *bio)
return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
}
/*
* Add extra pages in the same compressed file extent so that we don't need to
* re-read the same extent again and again.
*
* NOTE: this won't work well for subpage, as for subpage read, we lock the
* full page then submit bio for each compressed/regular extents.
*
* This means, if we have several sectors in the same page points to the same
* on-disk compressed data, we will re-read the same extent many times and
* this function can only help for the next page.
*/
static noinline int add_ra_bio_pages(struct inode *inode,
u64 compressed_end,
struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long end_index;
unsigned long pg_index;
u64 last_offset;
u64 cur = bio_end_offset(cb->orig_bio);
u64 isize = i_size_read(inode);
int ret;
struct page *page;
unsigned long nr_pages = 0;
struct extent_map *em;
struct address_space *mapping = inode->i_mapping;
struct extent_map_tree *em_tree;
struct extent_io_tree *tree;
u64 end;
int misses = 0;
int sectors_missed = 0;
last_offset = bio_end_offset(cb->orig_bio);
em_tree = &BTRFS_I(inode)->extent_tree;
tree = &BTRFS_I(inode)->io_tree;
@@ -578,18 +681,29 @@ static noinline int add_ra_bio_pages(struct inode *inode,
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (last_offset < compressed_end) {
pg_index = last_offset >> PAGE_SHIFT;
while (cur < compressed_end) {
u64 page_end;
u64 pg_index = cur >> PAGE_SHIFT;
u32 add_size;
if (pg_index > end_index)
break;
page = xa_load(&mapping->i_pages, pg_index);
if (page && !xa_is_value(page)) {
misses++;
if (misses > 4)
sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
fs_info->sectorsize_bits;
/* Beyond threshold, no need to continue */
if (sectors_missed > 4)
break;
goto next;
/*
* Jump to next page start as we already have page for
* current offset.
*/
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
continue;
}
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
@@ -599,14 +713,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
put_page(page);
goto next;
/* There is already a page, skip to page end */
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
continue;
}
/*
* at this point, we have a locked page in the page cache
* for these bytes in the file. But, we have to make
* sure they map to this compressed extent on disk.
*/
ret = set_page_extent_mapped(page);
if (ret < 0) {
unlock_page(page);
@@ -614,18 +725,22 @@ static noinline int add_ra_bio_pages(struct inode *inode,
break;
}
end = last_offset + PAGE_SIZE - 1;
lock_extent(tree, last_offset, end);
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
lock_extent(tree, cur, page_end);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_SIZE);
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
read_unlock(&em_tree->lock);
if (!em || last_offset < em->start ||
(last_offset + PAGE_SIZE > extent_map_end(em)) ||
/*
* At this point, we have a locked page in the page cache for
* these bytes in the file. But, we have to make sure they map
* to this compressed extent on disk.
*/
if (!em || cur < em->start ||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, last_offset, end);
unlock_extent(tree, cur, page_end);
unlock_page(page);
put_page(page);
break;
@@ -643,20 +758,23 @@ static noinline int add_ra_bio_pages(struct inode *inode,
}
}
ret = bio_add_page(cb->orig_bio, page,
PAGE_SIZE, 0);
if (ret == PAGE_SIZE) {
nr_pages++;
put_page(page);
} else {
unlock_extent(tree, last_offset, end);
add_size = min(em->start + em->len, page_end + 1) - cur;
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
if (ret != add_size) {
unlock_extent(tree, cur, page_end);
unlock_page(page);
put_page(page);
break;
}
next:
last_offset += PAGE_SIZE;
/*
* If it's subpage, we also need to increase its
* subpage::readers number, as at endio we will decrease
* subpage::readers and to unlock the page.
*/
if (fs_info->sectorsize < PAGE_SIZE)
btrfs_subpage_start_reader(fs_info, page, cur, add_size);
put_page(page);
cur += add_size;
}
return 0;
}
@@ -681,9 +799,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
unsigned int compressed_len;
unsigned int nr_pages;
unsigned int pg_index;
struct page *page;
struct bio *comp_bio;
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
struct bio *comp_bio = NULL;
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 cur_disk_byte = disk_bytenr;
u64 next_stripe_start;
u64 file_offset;
u64 em_len;
u64 em_start;
@@ -710,7 +829,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!cb)
goto out;
refcount_set(&cb->pending_bios, 0);
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0;
cb->inode = inode;
cb->mirror_num = mirror_num;
@@ -750,86 +869,74 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
comp_bio = btrfs_bio_alloc(cur_disk_byte);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
u32 pg_len = PAGE_SIZE;
int submit = 0;
while (cur_disk_byte < disk_bytenr + compressed_len) {
u64 offset = cur_disk_byte - disk_bytenr;
unsigned int index = offset >> PAGE_SHIFT;
unsigned int real_size;
unsigned int added;
struct page *page = cb->compressed_pages[index];
bool submit = false;
/* Allocate new bio if submitted or not yet allocated */
if (!comp_bio) {
comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
REQ_OP_READ, end_compressed_bio_read,
&next_stripe_start);
if (IS_ERR(comp_bio)) {
ret = errno_to_blk_status(PTR_ERR(comp_bio));
comp_bio = NULL;
goto finish_cb;
}
}
/*
* To handle subpage case, we need to make sure the bio only
* covers the range we need.
*
* If we're at the last page, truncate the length to only cover
* the remaining part.
* We should never reach next_stripe_start start as we will
* submit comp_bio when reach the boundary immediately.
*/
if (pg_index == nr_pages - 1)
pg_len = min_t(u32, PAGE_SIZE,
compressed_len - pg_index * PAGE_SIZE);
ASSERT(cur_disk_byte != next_stripe_start);
/*
* We have various limit on the real read size:
* - stripe boundary
* - page boundary
* - compressed length boundary
*/
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
added = bio_add_page(comp_bio, page, real_size, offset_in_page(offset));
/*
* Maximum compressed extent is smaller than bio size limit,
* thus bio_add_page() should always success.
*/
ASSERT(added == real_size);
cur_disk_byte += added;
if (comp_bio->bi_iter.bi_size)
submit = btrfs_bio_fits_in_stripe(page, pg_len,
comp_bio, 0);
/* Reached stripe boundary, need to submit */
if (cur_disk_byte == next_stripe_start)
submit = true;
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
/* Has finished the range, need to submit */
if (cur_disk_byte == disk_bytenr + compressed_len)
submit = true;
if (submit) {
unsigned int nr_sectors;
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
* we inc the count. Otherwise, the cb might get
* freed before we're done setting it up
*/
refcount_inc(&cb->pending_bios);
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
BUG_ON(ret); /* -ENOMEM */
if (ret)
goto finish_cb;
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
fs_info->sectorsize);
sums += fs_info->csum_size * nr_sectors;
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
comp_bio = btrfs_bio_alloc(cur_disk_byte);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
bio_add_page(comp_bio, page, pg_len, 0);
ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num);
if (ret)
goto finish_cb;
comp_bio = NULL;
}
cur_disk_byte += pg_len;
}
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
return 0;
fail2:
@@ -844,6 +951,26 @@ fail1:
out:
free_extent_map(em);
return ret;
finish_cb:
if (comp_bio) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
/* All bytes of @cb is submitted, endio will free @cb */
if (cur_disk_byte == disk_bytenr + compressed_len)
return ret;
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
(disk_bytenr + compressed_len - cur_disk_byte) >>
fs_info->sectorsize_bits);
/*
* Even with previous bio ended, we should still have io not yet
* submitted, thus need to finish @cb manually.
*/
ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_read(cb, NULL);
return ret;
}
/*

View File

@@ -28,8 +28,8 @@ struct btrfs_inode;
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
struct compressed_bio {
/* number of bios pending for this compressed extent */
refcount_t pending_bios;
/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
refcount_t pending_sectors;
/* Number of compressed pages in the array */
unsigned int nr_pages;

View File

@@ -396,7 +396,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (*cow_ret == buf)
unlock_orig = 1;
btrfs_assert_tree_locked(buf);
btrfs_assert_tree_write_locked(buf);
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid);
@@ -2488,7 +2488,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
int ret;
BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
btrfs_assert_tree_write_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
@@ -2828,7 +2828,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (slot >= btrfs_header_nritems(upper) - 1)
return 1;
btrfs_assert_tree_locked(path->nodes[1]);
btrfs_assert_tree_write_locked(path->nodes[1]);
right = btrfs_read_node_slot(upper, slot + 1);
/*
@@ -3066,7 +3066,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (right_nritems == 0)
return 1;
btrfs_assert_tree_locked(path->nodes[1]);
btrfs_assert_tree_write_locked(path->nodes[1]);
left = btrfs_read_node_slot(path->nodes[1], slot - 1);
/*
@@ -3581,40 +3581,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
return ret;
}
/*
* This function duplicate a item, giving 'new_key' to the new item.
* It guarantees both items live in the same tree leaf and the new item
* is contiguous with the original item.
*
* This allows us to split file extent in place, keeping a lock on the
* leaf the entire time.
*/
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *new_key)
{
struct extent_buffer *leaf;
int ret;
u32 item_size;
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ret = setup_leaf_for_split(trans, root, path,
item_size + sizeof(struct btrfs_item));
if (ret)
return ret;
path->slots[0]++;
setup_items_for_insert(root, path, new_key, &item_size, 1);
leaf = path->nodes[0];
memcpy_extent_buffer(leaf,
btrfs_item_ptr_offset(leaf, path->slots[0]),
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
item_size);
return 0;
}
/*
* make the item pointed to by the path smaller. new_size indicates
* how small to make it, and from_end tells us if we just chop bytes
@@ -3786,13 +3752,10 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
*
* @root: root we are inserting items to
* @path: points to the leaf/slot where we are going to insert new items
* @cpu_key: array of keys for items to be inserted
* @data_size: size of the body of each item we are going to insert
* @nr: size of @cpu_key/@data_size arrays
* @batch: information about the batch of items to insert
*/
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
const struct btrfs_key *cpu_key, u32 *data_size,
int nr)
static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
const struct btrfs_item_batch *batch)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_item *item;
@@ -3804,14 +3767,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
int slot;
struct btrfs_map_token token;
u32 total_size;
u32 total_data = 0;
for (i = 0; i < nr; i++)
total_data += data_size[i];
total_size = total_data + (nr * sizeof(struct btrfs_item));
/*
* Before anything else, update keys in the parent and other ancestors
* if needed, then release the write locks on them, so that other tasks
* can use them while we modify the leaf.
*/
if (path->slots[0] == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
fixup_low_keys(path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
@@ -3821,6 +3784,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(leaf);
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
if (btrfs_leaf_free_space(leaf) < total_size) {
btrfs_print_leaf(leaf);
@@ -3850,31 +3814,32 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(&token, item);
btrfs_set_token_item_offset(&token, item,
ioff - total_data);
ioff - batch->total_data_size);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
btrfs_item_nr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_item));
/* shift the data */
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
data_end, old_data - data_end);
data_end - batch->total_data_size,
BTRFS_LEAF_DATA_OFFSET + data_end,
old_data - data_end);
data_end = old_data;
}
/* setup the item for the new data */
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
for (i = 0; i < batch->nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(slot + i);
data_end -= data_size[i];
data_end -= batch->data_sizes[i];
btrfs_set_token_item_offset(&token, item, data_end);
btrfs_set_token_item_size(&token, item, data_size[i]);
btrfs_set_token_item_size(&token, item, batch->data_sizes[i]);
}
btrfs_set_header_nritems(leaf, nritems + nr);
btrfs_set_header_nritems(leaf, nritems + batch->nr);
btrfs_mark_buffer_dirty(leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
@@ -3883,6 +3848,29 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
}
}
/*
* Insert a new item into a leaf.
*
* @root: The root of the btree.
* @path: A path pointing to the target leaf and slot.
* @key: The key of the new item.
* @data_size: The size of the data associated with the new key.
*/
void btrfs_setup_item_for_insert(struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *key,
u32 data_size)
{
struct btrfs_item_batch batch;
batch.keys = key;
batch.data_sizes = &data_size;
batch.total_data_size = data_size;
batch.nr = 1;
setup_items_for_insert(root, path, &batch);
}
/*
* Given a key and some data, insert items into the tree.
* This does all the path init required, making room in the tree if needed.
@@ -3890,20 +3878,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *cpu_key, u32 *data_size,
int nr)
const struct btrfs_item_batch *batch)
{
int ret = 0;
int slot;
int i;
u32 total_size = 0;
u32 total_data = 0;
u32 total_size;
for (i = 0; i < nr; i++)
total_data += data_size[i];
total_size = total_data + (nr * sizeof(struct btrfs_item));
ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
if (ret == 0)
return -EEXIST;
if (ret < 0)
@@ -3912,7 +3894,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
slot = path->slots[0];
BUG_ON(slot < 0);
setup_items_for_insert(root, path, cpu_key, data_size, nr);
setup_items_for_insert(root, path, batch);
return 0;
}
@@ -3943,6 +3925,40 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ret;
}
/*
* This function duplicates an item, giving 'new_key' to the new item.
* It guarantees both items live in the same tree leaf and the new item is
* contiguous with the original item.
*
* This allows us to split a file extent in place, keeping a lock on the leaf
* the entire time.
*/
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *new_key)
{
struct extent_buffer *leaf;
int ret;
u32 item_size;
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ret = setup_leaf_for_split(trans, root, path,
item_size + sizeof(struct btrfs_item));
if (ret)
return ret;
path->slots[0]++;
btrfs_setup_item_for_insert(root, path, new_key, item_size);
leaf = path->nodes[0];
memcpy_extent_buffer(leaf,
btrfs_item_ptr_offset(leaf, path->slots[0]),
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
item_size);
return 0;
}
/*
* delete the pointer from a given node.
*

View File

@@ -48,6 +48,7 @@ extern struct kmem_cache *btrfs_free_space_cachep;
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
struct btrfs_ordered_sum;
struct btrfs_ref;
struct btrfs_bio;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@@ -217,6 +218,9 @@ struct btrfs_root_backup {
u8 unused_8[10];
} __attribute__ ((__packed__));
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
#define BTRFS_SUPER_INFO_SIZE 4096
/*
* the super block basically lists the main trees of the FS
* it currently lacks any block count etc etc
@@ -269,7 +273,11 @@ struct btrfs_super_block {
__le64 reserved[28];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
/* Padded to 4096 bytes */
u8 padding[565];
} __attribute__ ((__packed__));
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
/*
* Compat flags that we support. If any incompat flags are set other than the
@@ -899,6 +907,7 @@ struct btrfs_fs_info {
struct btrfs_workqueue *scrub_workers;
struct btrfs_workqueue *scrub_wr_completion_workers;
struct btrfs_workqueue *scrub_parity_workers;
struct btrfs_subpage_info *subpage_info;
struct btrfs_discard_ctl discard_ctl;
@@ -1017,6 +1026,16 @@ struct btrfs_fs_info {
spinlock_t treelog_bg_lock;
u64 treelog_bg;
/*
* Start of the dedicated data relocation block group, protected by
* relocation_bg_lock.
*/
spinlock_t relocation_bg_lock;
u64 data_reloc_bg;
spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@@ -2885,16 +2904,42 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
return btrfs_del_items(trans, root, path, path->slots[0], 1);
}
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
const struct btrfs_key *cpu_key, u32 *data_size,
int nr);
/*
* Describes a batch of items to insert in a btree. This is used by
* btrfs_insert_empty_items().
*/
struct btrfs_item_batch {
/*
* Pointer to an array containing the keys of the items to insert (in
* sorted order).
*/
const struct btrfs_key *keys;
/* Pointer to an array containing the data size for each item to insert. */
const u32 *data_sizes;
/*
* The sum of data sizes for all items. The caller can compute this while
* setting up the data_sizes array, so it ends up being more efficient
* than having btrfs_insert_empty_items() or setup_item_for_insert()
* doing it, as it would avoid an extra loop over a potentially large
* array, and in the case of setup_item_for_insert(), we would be doing
* it while holding a write lock on a leaf and often on upper level nodes
* too, unnecessarily increasing the size of a critical section.
*/
u32 total_data_size;
/* Size of the keys and data_sizes arrays (number of items in the batch). */
int nr;
};
void btrfs_setup_item_for_insert(struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *key,
u32 data_size);
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, void *data, u32 data_size);
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *cpu_key, u32 *data_size,
int nr);
const struct btrfs_item_batch *batch);
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -2902,7 +2947,14 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
const struct btrfs_key *key,
u32 data_size)
{
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
struct btrfs_item_batch batch;
batch.keys = key;
batch.data_sizes = &data_size;
batch.total_data_size = data_size;
batch.nr = 1;
return btrfs_insert_empty_items(trans, root, path, &batch);
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
@@ -3129,8 +3181,9 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path);
/* inode.c */
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
struct page *page, u64 start, u64 end);
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
u32 bio_offset, struct page *page,
u64 start, u64 end);
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
u64 start, u64 len);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
@@ -3142,7 +3195,6 @@ void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len);
int btrfs_add_link(struct btrfs_trans_handle *trans,
@@ -3174,8 +3226,6 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
struct extent_state *other);
void btrfs_split_delalloc_extent(struct inode *inode,
struct extent_state *orig, u64 split);
int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
unsigned long bio_flags);
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
@@ -3242,9 +3292,9 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
u64 newer_than, unsigned long max_to_defrag);
void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
@@ -3563,6 +3613,9 @@ do { \
(errno), fmt, ##args); \
} while (0)
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
&(fs_info)->fs_state)))
__printf(5, 6)
__cold
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
@@ -3842,6 +3895,11 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
return fs_info->zoned != 0;
}
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
{
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}
/*
* We use page status Private2 to indicate there is an ordered extent with
* unfinished IO.

View File

@@ -679,19 +679,18 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *first_item)
{
LIST_HEAD(batch);
LIST_HEAD(item_list);
struct btrfs_delayed_item *curr;
struct btrfs_delayed_item *next;
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
struct btrfs_item_batch batch;
int total_size;
int nitems;
char *ins_data = NULL;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
int ret;
list_add_tail(&first_item->tree_list, &batch);
nitems = 1;
list_add_tail(&first_item->tree_list, &item_list);
batch.total_data_size = first_item->data_len;
batch.nr = 1;
total_size = first_item->data_len + sizeof(struct btrfs_item);
curr = first_item;
@@ -706,39 +705,43 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
if (total_size + next_size > max_size)
break;
list_add_tail(&next->tree_list, &batch);
nitems++;
list_add_tail(&next->tree_list, &item_list);
batch.nr++;
total_size += next_size;
batch.total_data_size += next->data_len;
curr = next;
}
if (nitems == 1) {
ins_keys = &first_item->key;
ins_sizes = &first_item->data_len;
if (batch.nr == 1) {
batch.keys = &first_item->key;
batch.data_sizes = &first_item->data_len;
} else {
struct btrfs_key *ins_keys;
u32 *ins_sizes;
int i = 0;
ins_data = kmalloc(nitems * sizeof(u32) +
nitems * sizeof(struct btrfs_key), GFP_NOFS);
ins_data = kmalloc(batch.nr * sizeof(u32) +
batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
if (!ins_data) {
ret = -ENOMEM;
goto out;
}
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
list_for_each_entry(curr, &batch, tree_list) {
ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
batch.keys = ins_keys;
batch.data_sizes = ins_sizes;
list_for_each_entry(curr, &item_list, tree_list) {
ins_keys[i] = curr->key;
ins_sizes[i] = curr->data_len;
i++;
}
}
ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
nitems);
ret = btrfs_insert_empty_items(trans, root, path, &batch);
if (ret)
goto out;
list_for_each_entry(curr, &batch, tree_list) {
list_for_each_entry(curr, &item_list, tree_list) {
char *data_ptr;
data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
@@ -754,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
*/
btrfs_release_path(path);
list_for_each_entry_safe(curr, next, &batch, tree_list) {
list_for_each_entry_safe(curr, next, &item_list, tree_list) {
list_del(&curr->tree_list);
btrfs_delayed_item_release_metadata(root, curr);
btrfs_release_delayed_item(curr);

View File

@@ -906,7 +906,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
u64 parent = generic_ref->parent;
u8 ref_type;
is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
BUG_ON(extent_op && extent_op->is_data);
@@ -921,8 +921,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
is_fstree(generic_ref->real_root) &&
is_fstree(generic_ref->tree_ref.root) &&
!generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
@@ -938,14 +936,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
generic_ref->tree_ref.root, action, ref_type);
ref->root = generic_ref->tree_ref.root;
generic_ref->tree_ref.owning_root, action,
ref_type);
ref->root = generic_ref->tree_ref.owning_root;
ref->parent = parent;
ref->level = level;
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
generic_ref->tree_ref.root, 0, action, false,
is_system);
generic_ref->tree_ref.owning_root, 0, action,
false, is_system);
head_ref->extent_op = extent_op;
delayed_refs = &trans->transaction->delayed_refs;
@@ -997,7 +996,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
u64 ref_root = generic_ref->data_ref.ref_root;
u64 ref_root = generic_ref->data_ref.owning_root;
u64 owner = generic_ref->data_ref.ino;
u64 offset = generic_ref->data_ref.offset;
u8 ref_type;
@@ -1026,8 +1025,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
is_fstree(ref_root) &&
is_fstree(generic_ref->real_root) &&
!generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {

View File

@@ -186,8 +186,8 @@ enum btrfs_ref_type {
struct btrfs_data_ref {
/* For EXTENT_DATA_REF */
/* Root which refers to this data extent */
u64 ref_root;
/* Original root this data extent belongs to */
u64 owning_root;
/* Inode which refers to this data extent */
u64 ino;
@@ -210,11 +210,11 @@ struct btrfs_tree_ref {
int level;
/*
* Root which refers to this tree block.
* Root which owns this tree block.
*
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
*/
u64 root;
u64 owning_root;
/* For non-skinny metadata, no special member needed */
};
@@ -231,17 +231,10 @@ struct btrfs_ref {
*/
bool skip_qgroup;
/*
* Optional. For which root is this modification.
* Mostly used for qgroup optimization.
*
* When unset, data/tree ref init code will populate it.
* In certain cases, we're modifying reference for a different root.
* E.g. COW fs tree blocks for balance.
* In that case, tree_ref::root will be fs tree, but we're doing this
* for reloc tree, then we should set @real_root to reloc tree.
*/
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
/* Through which root is this modification. */
u64 real_root;
#endif
u64 bytenr;
u64 len;
@@ -271,26 +264,40 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
}
static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
int level, u64 root)
int level, u64 root, u64 mod_root, bool skip_qgroup)
{
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
/* If @real_root not set, use @root as fallback */
if (!generic_ref->real_root)
generic_ref->real_root = root;
generic_ref->real_root = mod_root ?: root;
#endif
generic_ref->tree_ref.level = level;
generic_ref->tree_ref.root = root;
generic_ref->tree_ref.owning_root = root;
generic_ref->type = BTRFS_REF_METADATA;
if (skip_qgroup || !(is_fstree(root) &&
(!mod_root || is_fstree(mod_root))))
generic_ref->skip_qgroup = true;
else
generic_ref->skip_qgroup = false;
}
static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
u64 ref_root, u64 ino, u64 offset)
u64 ref_root, u64 ino, u64 offset, u64 mod_root,
bool skip_qgroup)
{
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
/* If @real_root not set, use @root as fallback */
if (!generic_ref->real_root)
generic_ref->real_root = ref_root;
generic_ref->data_ref.ref_root = ref_root;
generic_ref->real_root = mod_root ?: ref_root;
#endif
generic_ref->data_ref.owning_root = ref_root;
generic_ref->data_ref.ino = ino;
generic_ref->data_ref.offset = offset;
generic_ref->type = BTRFS_REF_DATA;
if (skip_qgroup || !(is_fstree(ref_root) &&
(!mod_root || is_fstree(mod_root))))
generic_ref->skip_qgroup = true;
else
generic_ref->skip_qgroup = false;
}
static inline struct btrfs_delayed_extent_op *

View File

@@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data);
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
{
struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID };
struct btrfs_key key;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
@@ -100,8 +101,7 @@ no_valid_dev_replace_entry_found:
* We don't have a replace item or it's corrupted. If there is
* a replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info,
"found replace target device without a valid replace item");
ret = -EUCLEAN;
@@ -163,8 +163,7 @@ no_valid_dev_replace_entry_found:
* We don't have an active replace item but if there is a
* replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info,
"replace devid present without an active replace item");
ret = -EUCLEAN;
@@ -175,11 +174,10 @@ no_valid_dev_replace_entry_found:
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
src_devid, NULL, NULL);
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID,
NULL, NULL);
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args);
args.devid = src_devid;
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args);
/*
* allow 'btrfs dev replace_cancel' if src/tgt device is
* missing

View File

@@ -683,7 +683,7 @@ err:
return ret;
}
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror)
{
@@ -1036,7 +1036,7 @@ static int btree_set_page_dirty(struct page *page)
BUG_ON(!eb);
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(!atomic_read(&eb->refs));
btrfs_assert_tree_locked(eb);
btrfs_assert_tree_write_locked(eb);
return __set_page_dirty_nobuffers(page);
}
ASSERT(PagePrivate(page) && page->private);
@@ -1061,7 +1061,7 @@ static int btree_set_page_dirty(struct page *page)
ASSERT(eb);
ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
ASSERT(atomic_read(&eb->refs));
btrfs_assert_tree_locked(eb);
btrfs_assert_tree_write_locked(eb);
free_extent_buffer(eb);
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
@@ -1125,7 +1125,7 @@ void btrfs_clean_tree_block(struct extent_buffer *buf)
struct btrfs_fs_info *fs_info = buf->fs_info;
if (btrfs_header_generation(buf) ==
fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
btrfs_assert_tree_write_locked(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
@@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
goto fail;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
!btrfs_is_data_reloc_root(root)) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info->subpage_info);
kvfree(fs_info);
}
@@ -1953,8 +1954,7 @@ sleep:
wake_up_process(fs_info->cleaner_kthread);
mutex_unlock(&fs_info->transaction_kthread_mutex);
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
&fs_info->fs_state)))
if (BTRFS_FS_ERROR(fs_info))
btrfs_cleanup_transaction(fs_info);
if (!kthread_should_stop() &&
(!btrfs_transaction_blocked(fs_info) ||
@@ -2592,8 +2592,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
/*
* For 4K page size, we only support 4K sector size.
* For 64K page size, we support read-write for 64K sector size, and
* read-only for 4K sector size.
* For 64K page size, we support 64K and 4K sector sizes.
*/
if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
(PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
@@ -2883,6 +2882,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
spin_lock_init(&fs_info->relocation_bg_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->reclaim_bgs_lock);
@@ -2896,6 +2897,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
INIT_LIST_HEAD(&fs_info->unused_bgs);
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
INIT_LIST_HEAD(&fs_info->zone_active_bgs);
#ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&fs_info->allocated_roots);
INIT_LIST_HEAD(&fs_info->allocated_ebs);
@@ -3228,12 +3230,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
btrfs_init_btree_inode(fs_info);
invalidate_bdev(fs_devices->latest_bdev);
invalidate_bdev(fs_devices->latest_dev->bdev);
/*
* Read super block and check the signature bytes only
*/
disk_super = btrfs_read_dev_super(fs_devices->latest_bdev);
disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
if (IS_ERR(disk_super)) {
err = PTR_ERR(disk_super);
goto fail_alloc;
@@ -3392,12 +3394,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
if (sectorsize != PAGE_SIZE) {
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
btrfs_warn(fs_info,
"read-write for sector size %u with page size %lu is experimental",
sectorsize, PAGE_SIZE);
}
if (sectorsize != PAGE_SIZE) {
if (btrfs_super_incompat_flags(fs_info->super_copy) &
BTRFS_FEATURE_INCOMPAT_RAID56) {
btrfs_err(fs_info,
@@ -3406,6 +3408,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
err = -EINVAL;
goto fail_alloc;
}
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
if (!subpage_info)
goto fail_alloc;
btrfs_init_subpage_info(subpage_info, sectorsize);
fs_info->subpage_info = subpage_info;
}
ret = btrfs_init_workqueues(fs_info, fs_devices);
@@ -3465,7 +3472,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* below in btrfs_init_dev_replace().
*/
btrfs_free_extra_devids(fs_devices);
if (!fs_devices->latest_bdev) {
if (!fs_devices->latest_dev->bdev) {
btrfs_err(fs_info, "failed to read devices");
goto fail_tree_roots;
}
@@ -3556,7 +3563,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs;
}
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
"writable mount is not allowed due to too many missing devices");
goto fail_sysfs;
@@ -3881,7 +3889,9 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_opf |= REQ_FUA;
btrfsic_submit_bio(bio);
btrfs_advance_sb_log(device, i);
if (btrfs_advance_sb_log(device, i))
errors++;
}
return errors < i ? 0 : -1;
}
@@ -4221,7 +4231,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
drop_ref = true;
spin_unlock(&fs_info->fs_roots_radix_lock);
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
if (BTRFS_FS_ERROR(fs_info)) {
ASSERT(root->log_root == NULL);
if (root->reloc_root) {
btrfs_put_root(root->reloc_root);
@@ -4372,8 +4382,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "commit super ret %d", ret);
}
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) ||
test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state))
if (BTRFS_FS_ERROR(fs_info))
btrfs_error_commit_super(fs_info);
kthread_stop(fs_info->transaction_kthread);
@@ -4470,7 +4479,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
return;
#endif
btrfs_assert_tree_locked(buf);
btrfs_assert_tree_write_locked(buf);
if (transid != fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
buf->start, transid, fs_info->generation);

View File

@@ -6,9 +6,6 @@
#ifndef BTRFS_DISK_IO_H
#define BTRFS_DISK_IO_H
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
#define BTRFS_SUPER_INFO_SIZE 4096
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12
@@ -81,7 +78,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror);
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,

View File

@@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
return ret;
}
static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
{
struct btrfs_device *dev = stripe->dev;
struct btrfs_fs_info *fs_info = dev->fs_info;
@@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 discarded_bytes = 0;
u64 end = bytenr + num_bytes;
u64 cur = bytenr;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
/*
* Avoid races with device replace and make sure our bbio has devices
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(fs_info);
while (cur < end) {
struct btrfs_bio_stripe *stripe;
struct btrfs_io_stripe *stripe;
int i;
num_bytes = end - cur;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
&num_bytes, &bbio, 0);
&num_bytes, &bioc, 0);
/*
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
@@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
if (ret < 0)
goto out;
stripe = bbio->stripes;
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
stripe = bioc->stripes;
for (i = 0; i < bioc->num_stripes; i++, stripe++) {
u64 bytes;
struct btrfs_device *device = stripe->dev;
@@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
* And since there are two loops, explicitly
* go to out to avoid confusion.
*/
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
goto out;
}
@@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
*/
ret = 0;
}
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
cur += num_bytes;
}
out:
@@ -1397,7 +1396,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
generic_ref->action);
BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
if (generic_ref->type == BTRFS_REF_METADATA)
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
@@ -2376,7 +2375,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
out:
btrfs_free_path(path);
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
if (btrfs_is_data_reloc_root(root))
WARN_ON(ret > 0);
return ret;
}
@@ -2438,10 +2437,9 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
btrfs_init_generic_ref(&generic_ref, action, bytenr,
num_bytes, parent);
generic_ref.real_root = root->root_key.objectid;
btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
key.offset);
generic_ref.skip_qgroup = for_reloc;
key.offset, root->root_key.objectid,
for_reloc);
if (inc)
ret = btrfs_inc_extent_ref(trans, &generic_ref);
else
@@ -2453,9 +2451,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = fs_info->nodesize;
btrfs_init_generic_ref(&generic_ref, action, bytenr,
num_bytes, parent);
generic_ref.real_root = root->root_key.objectid;
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
generic_ref.skip_qgroup = for_reloc;
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
root->root_key.objectid, for_reloc);
if (inc)
ret = btrfs_inc_extent_ref(trans, &generic_ref);
else
@@ -3196,7 +3193,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3289,7 +3286,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
buf->start, buf->len, parent);
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
root->root_key.objectid);
root->root_key.objectid, 0, false);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
btrfs_ref_tree_mod(fs_info, &generic_ref);
@@ -3373,9 +3370,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
* tree, just update pinning info and exit early.
*/
if ((ref->type == BTRFS_REF_METADATA &&
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
(ref->type == BTRFS_REF_DATA &&
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
/* unlocks the pinned mutex */
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
ret = 0;
@@ -3386,9 +3383,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
}
if (!((ref->type == BTRFS_REF_METADATA &&
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
(ref->type == BTRFS_REF_DATA &&
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
btrfs_ref_tree_mod(fs_info, ref);
return ret;
@@ -3476,7 +3473,9 @@ enum btrfs_extent_allocation_policy {
*/
struct find_free_extent_ctl {
/* Basic allocation info */
u64 ram_bytes;
u64 num_bytes;
u64 min_alloc_size;
u64 empty_size;
u64 flags;
int delalloc;
@@ -3495,6 +3494,9 @@ struct find_free_extent_ctl {
/* Allocation is called for tree-log */
bool for_treelog;
/* Allocation is called for data relocation */
bool for_data_reloc;
/* RAID index, converted from flags */
int index;
@@ -3756,8 +3758,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
u64 avail;
u64 bytenr = block_group->start;
u64 log_bytenr;
u64 data_reloc_bytenr;
int ret = 0;
bool skip;
bool skip = false;
ASSERT(btrfs_is_zoned(block_group->fs_info));
@@ -3767,19 +3770,49 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
*/
spin_lock(&fs_info->treelog_bg_lock);
log_bytenr = fs_info->treelog_bg;
skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
(!ffe_ctl->for_treelog && bytenr == log_bytenr));
if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
(!ffe_ctl->for_treelog && bytenr == log_bytenr)))
skip = true;
spin_unlock(&fs_info->treelog_bg_lock);
if (skip)
return 1;
/*
* Do not allow non-relocation blocks in the dedicated relocation block
* group, and vice versa.
*/
spin_lock(&fs_info->relocation_bg_lock);
data_reloc_bytenr = fs_info->data_reloc_bg;
if (data_reloc_bytenr &&
((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
(!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
skip = true;
spin_unlock(&fs_info->relocation_bg_lock);
if (skip)
return 1;
/* Check RO and no space case before trying to activate it */
spin_lock(&block_group->lock);
if (block_group->ro ||
block_group->alloc_offset == block_group->zone_capacity) {
spin_unlock(&block_group->lock);
return 1;
}
spin_unlock(&block_group->lock);
if (!btrfs_zone_activate(block_group))
return 1;
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
spin_lock(&fs_info->treelog_bg_lock);
spin_lock(&fs_info->relocation_bg_lock);
ASSERT(!ffe_ctl->for_treelog ||
block_group->start == fs_info->treelog_bg ||
fs_info->treelog_bg == 0);
ASSERT(!ffe_ctl->for_data_reloc ||
block_group->start == fs_info->data_reloc_bg ||
fs_info->data_reloc_bg == 0);
if (block_group->ro) {
ret = 1;
@@ -3796,7 +3829,18 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
goto out;
}
avail = block_group->length - block_group->alloc_offset;
/*
* Do not allow currently used block group to be the data relocation
* dedicated block group.
*/
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
(block_group->used || block_group->reserved)) {
ret = 1;
goto out;
}
WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
avail = block_group->zone_capacity - block_group->alloc_offset;
if (avail < num_bytes) {
if (ffe_ctl->max_extent_size < avail) {
/*
@@ -3813,6 +3857,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
fs_info->treelog_bg = block_group->start;
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
fs_info->data_reloc_bg = block_group->start;
ffe_ctl->found_offset = start + block_group->alloc_offset;
block_group->alloc_offset += num_bytes;
spin_lock(&ctl->tree_lock);
@@ -3829,6 +3876,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out:
if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0;
if (ret && ffe_ctl->for_data_reloc)
fs_info->data_reloc_bg = 0;
spin_unlock(&fs_info->relocation_bg_lock);
spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
@@ -3932,18 +3982,30 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
ffe_ctl->orig_have_caching_bg = true;
if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
ffe_ctl->have_caching_bg)
return 1;
if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
return 1;
if (ins->objectid) {
found_extent(ffe_ctl, ins);
return 0;
}
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->index)) {
/*
* If we have enough free space left in an already active block
* group and we can't activate any other zone now, retry the
* active ones with a smaller allocation size. Returning early
* from here will tell btrfs_reserve_extent() to haven the
* size.
*/
return -ENOSPC;
}
if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
return 1;
ffe_ctl->index++;
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
return 1;
/*
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
* caching kthreads as we move along
@@ -4085,6 +4147,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
ffe_ctl->hint_byte = fs_info->treelog_bg;
spin_unlock(&fs_info->treelog_bg_lock);
}
if (ffe_ctl->for_data_reloc) {
spin_lock(&fs_info->relocation_bg_lock);
if (fs_info->data_reloc_bg)
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
spin_unlock(&fs_info->relocation_bg_lock);
}
return 0;
default:
BUG();
@@ -4117,65 +4185,62 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
* |- If not found, re-iterate all block groups
*/
static noinline int find_free_extent(struct btrfs_root *root,
u64 ram_bytes, u64 num_bytes, u64 empty_size,
u64 hint_byte_orig, struct btrfs_key *ins,
u64 flags, int delalloc)
struct btrfs_key *ins,
struct find_free_extent_ctl *ffe_ctl)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
int cache_block_group_error = 0;
struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
struct btrfs_space_info *space_info;
bool full_search = false;
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
WARN_ON(num_bytes < fs_info->sectorsize);
ffe_ctl.num_bytes = num_bytes;
ffe_ctl.empty_size = empty_size;
ffe_ctl.flags = flags;
ffe_ctl.search_start = 0;
ffe_ctl.delalloc = delalloc;
ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
ffe_ctl.have_caching_bg = false;
ffe_ctl.orig_have_caching_bg = false;
ffe_ctl.found_offset = 0;
ffe_ctl.hint_byte = hint_byte_orig;
ffe_ctl.for_treelog = for_treelog;
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
ffe_ctl->search_start = 0;
/* For clustered allocation */
ffe_ctl.retry_clustered = false;
ffe_ctl.retry_unclustered = false;
ffe_ctl.last_ptr = NULL;
ffe_ctl.use_cluster = true;
ffe_ctl->empty_cluster = 0;
ffe_ctl->last_ptr = NULL;
ffe_ctl->use_cluster = true;
ffe_ctl->have_caching_bg = false;
ffe_ctl->orig_have_caching_bg = false;
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
ffe_ctl->loop = 0;
/* For clustered allocation */
ffe_ctl->retry_clustered = false;
ffe_ctl->retry_unclustered = false;
ffe_ctl->cached = 0;
ffe_ctl->max_extent_size = 0;
ffe_ctl->total_free_space = 0;
ffe_ctl->found_offset = 0;
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
if (btrfs_is_zoned(fs_info))
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED;
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
ins->type = BTRFS_EXTENT_ITEM_KEY;
ins->objectid = 0;
ins->offset = 0;
trace_find_free_extent(root, num_bytes, empty_size, flags);
trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
ffe_ctl->flags);
space_info = btrfs_find_space_info(fs_info, flags);
space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
if (!space_info) {
btrfs_err(fs_info, "No space info for %llu", flags);
btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
return -ENOSPC;
}
ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
if (ret < 0)
return ret;
ffe_ctl.search_start = max(ffe_ctl.search_start,
first_logical_byte(fs_info, 0));
ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
ffe_ctl->search_start = max(ffe_ctl->search_start,
first_logical_byte(fs_info, 0));
ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
block_group = btrfs_lookup_block_group(fs_info,
ffe_ctl.search_start);
ffe_ctl->search_start);
/*
* we don't want to use the block group if it doesn't match our
* allocation bits, or if its not cached.
@@ -4183,7 +4248,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
* However if we are re-searching with an ideal block group
* picked out then we don't care that the block group is cached.
*/
if (block_group && block_group_bits(block_group, flags) &&
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
block_group->cached != BTRFS_CACHE_NO) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
@@ -4197,9 +4262,10 @@ static noinline int find_free_extent(struct btrfs_root *root,
btrfs_put_block_group(block_group);
up_read(&space_info->groups_sem);
} else {
ffe_ctl.index = btrfs_bg_flags_to_raid_index(
block_group->flags);
btrfs_lock_block_group(block_group, delalloc);
ffe_ctl->index = btrfs_bg_flags_to_raid_index(
block_group->flags);
btrfs_lock_block_group(block_group,
ffe_ctl->delalloc);
goto have_block_group;
}
} else if (block_group) {
@@ -4207,31 +4273,33 @@ static noinline int find_free_extent(struct btrfs_root *root,
}
}
search:
ffe_ctl.have_caching_bg = false;
if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
ffe_ctl.index == 0)
ffe_ctl->have_caching_bg = false;
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
ffe_ctl->index == 0)
full_search = true;
down_read(&space_info->groups_sem);
list_for_each_entry(block_group,
&space_info->block_groups[ffe_ctl.index], list) {
&space_info->block_groups[ffe_ctl->index], list) {
struct btrfs_block_group *bg_ret;
/* If the block group is read-only, we can skip it entirely. */
if (unlikely(block_group->ro)) {
if (for_treelog)
if (ffe_ctl->for_treelog)
btrfs_clear_treelog_bg(block_group);
if (ffe_ctl->for_data_reloc)
btrfs_clear_data_reloc_bg(block_group);
continue;
}
btrfs_grab_block_group(block_group, delalloc);
ffe_ctl.search_start = block_group->start;
btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
ffe_ctl->search_start = block_group->start;
/*
* this can happen if we end up cycling through all the
* raid types, but we want to make sure we only allocate
* for the proper type.
*/
if (!block_group_bits(block_group, flags)) {
if (!block_group_bits(block_group, ffe_ctl->flags)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_RAID56_MASK |
@@ -4242,7 +4310,7 @@ search:
* doesn't provide them, bail. This does allow us to
* fill raid0 from raid1.
*/
if ((flags & extra) && !(block_group->flags & extra))
if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
goto loop;
/*
@@ -4250,14 +4318,14 @@ search:
* It's possible that we have MIXED_GROUP flag but no
* block group is mixed. Just skip such block group.
*/
btrfs_release_block_group(block_group, delalloc);
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
continue;
}
have_block_group:
ffe_ctl.cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
/*
@@ -4280,10 +4348,11 @@ have_block_group:
goto loop;
bg_ret = NULL;
ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
if (ret == 0) {
if (bg_ret && bg_ret != block_group) {
btrfs_release_block_group(block_group, delalloc);
btrfs_release_block_group(block_group,
ffe_ctl->delalloc);
block_group = bg_ret;
}
} else if (ret == -EAGAIN) {
@@ -4293,46 +4362,49 @@ have_block_group:
}
/* Checks */
ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
fs_info->stripesize);
ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
fs_info->stripesize);
/* move on to the next group */
if (ffe_ctl.search_start + num_bytes >
if (ffe_ctl->search_start + ffe_ctl->num_bytes >
block_group->start + block_group->length) {
btrfs_add_free_space_unused(block_group,
ffe_ctl.found_offset, num_bytes);
ffe_ctl->found_offset,
ffe_ctl->num_bytes);
goto loop;
}
if (ffe_ctl.found_offset < ffe_ctl.search_start)
if (ffe_ctl->found_offset < ffe_ctl->search_start)
btrfs_add_free_space_unused(block_group,
ffe_ctl.found_offset,
ffe_ctl.search_start - ffe_ctl.found_offset);
ffe_ctl->found_offset,
ffe_ctl->search_start - ffe_ctl->found_offset);
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
num_bytes, delalloc);
ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
ffe_ctl->num_bytes,
ffe_ctl->delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space_unused(block_group,
ffe_ctl.found_offset, num_bytes);
ffe_ctl->found_offset,
ffe_ctl->num_bytes);
goto loop;
}
btrfs_inc_block_group_reservations(block_group);
/* we are all good, lets return */
ins->objectid = ffe_ctl.search_start;
ins->offset = num_bytes;
ins->objectid = ffe_ctl->search_start;
ins->offset = ffe_ctl->num_bytes;
trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
num_bytes);
btrfs_release_block_group(block_group, delalloc);
trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
ffe_ctl->num_bytes);
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
break;
loop:
release_block_group(block_group, &ffe_ctl, delalloc);
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
cond_resched();
}
up_read(&space_info->groups_sem);
ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
if (ret > 0)
goto search;
@@ -4341,12 +4413,12 @@ loop:
* Use ffe_ctl->total_free_space as fallback if we can't find
* any contiguous hole.
*/
if (!ffe_ctl.max_extent_size)
ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
if (!ffe_ctl->max_extent_size)
ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
spin_lock(&space_info->lock);
space_info->max_extent_size = ffe_ctl.max_extent_size;
space_info->max_extent_size = ffe_ctl->max_extent_size;
spin_unlock(&space_info->lock);
ins->offset = ffe_ctl.max_extent_size;
ins->offset = ffe_ctl->max_extent_size;
} else if (ret == -ENOSPC) {
ret = cache_block_group_error;
}
@@ -4404,16 +4476,28 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
struct btrfs_key *ins, int is_data, int delalloc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct find_free_extent_ctl ffe_ctl = {};
bool final_tried = num_bytes == min_alloc_size;
u64 flags;
int ret;
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
flags = get_alloc_profile_by_root(root, is_data);
again:
WARN_ON(num_bytes < fs_info->sectorsize);
ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
hint_byte, ins, flags, delalloc);
ffe_ctl.ram_bytes = ram_bytes;
ffe_ctl.num_bytes = num_bytes;
ffe_ctl.min_alloc_size = min_alloc_size;
ffe_ctl.empty_size = empty_size;
ffe_ctl.flags = flags;
ffe_ctl.delalloc = delalloc;
ffe_ctl.hint_byte = hint_byte;
ffe_ctl.for_treelog = for_treelog;
ffe_ctl.for_data_reloc = for_data_reloc;
ret = find_free_extent(root, ins, &ffe_ctl);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
} else if (ret == -ENOSPC) {
@@ -4431,8 +4515,8 @@ again:
sinfo = btrfs_find_space_info(fs_info, flags);
btrfs_err(fs_info,
"allocation failed flags %llu, wanted %llu tree-log %d",
flags, num_bytes, for_treelog);
"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
flags, num_bytes, for_treelog, for_data_reloc);
if (sinfo)
btrfs_dump_space_info(fs_info, sinfo,
num_bytes, 1);
@@ -4543,7 +4627,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
@@ -4632,7 +4716,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
return ret;
ret = btrfs_update_block_group(trans, extent_key.objectid,
fs_info->nodesize, 1);
fs_info->nodesize, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
extent_key.objectid, extent_key.offset);
@@ -4655,7 +4739,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
ins->objectid, ins->offset, 0);
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
offset, 0, false);
btrfs_ref_tree_mod(root->fs_info, &generic_ref);
return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
@@ -4847,8 +4932,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
ins.objectid, ins.offset, parent);
generic_ref.real_root = root->root_key.objectid;
btrfs_init_tree_ref(&generic_ref, level, root_objectid);
btrfs_init_tree_ref(&generic_ref, level, root_objectid,
root->root_key.objectid, false);
btrfs_ref_tree_mod(fs_info, &generic_ref);
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
if (ret)
@@ -5265,7 +5350,8 @@ skip:
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
fs_info->nodesize, parent);
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
0, false);
ret = btrfs_free_extent(trans, &ref);
if (ret)
goto out_unlock;
@@ -5750,13 +5836,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
btrfs_assert_tree_locked(parent);
btrfs_assert_tree_write_locked(parent);
parent_level = btrfs_header_level(parent);
atomic_inc(&parent->refs);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
btrfs_assert_tree_locked(node);
btrfs_assert_tree_write_locked(node);
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;

View File

@@ -241,7 +241,7 @@ int __init extent_io_init(void)
return -ENOMEM;
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_io_bio, bio),
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
goto free_buffer_cache;
@@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,
/*
* Find and lock a contiguous range of bytes in the file marked as delalloc, no
* more than @max_bytes. @Start and @end are used to return the range,
* more than @max_bytes.
*
* Return: true if we find something
* false if nothing was in the tree
* @start: The original start bytenr to search.
* Will store the extent range start bytenr.
* @end: The original end bytenr of the search range
* Will store the extent range end bytenr.
*
* Return true if we find a delalloc range which starts inside the original
* range, and @start/@end will store the delalloc range start/end.
*
* Return false if we can't find any delalloc range which starts inside the
* original range, and @start/@end will be the non-delalloc range start/end.
*/
EXPORT_FOR_TESTS
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
@@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
u64 *end)
{
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u64 orig_start = *start;
const u64 orig_end = *end;
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
u64 delalloc_start;
u64 delalloc_end;
@@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
int ret;
int loops = 0;
/* Caller should pass a valid @end to indicate the search range end */
ASSERT(orig_end > orig_start);
/* The range should at least cover part of the page */
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
orig_end <= page_offset(locked_page)));
again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
if (!found || delalloc_end <= *start) {
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
*start = delalloc_start;
*end = delalloc_end;
/* @delalloc_end can be -1, never go beyond @orig_end */
*end = min(delalloc_end, orig_end);
free_extent_state(cached_state);
return false;
}
@@ -2282,15 +2300,15 @@ int free_io_failure(struct extent_io_tree *failure_tree,
* currently, there can be no more than two copies of every data bit. thus,
* exactly one rewrite is required.
*/
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num)
static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num)
{
struct bio *bio;
struct btrfs_device *dev;
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
int ret;
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
@@ -2299,12 +2317,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
if (btrfs_is_zoned(fs_info))
return btrfs_repair_one_zone(fs_info, logical);
bio = btrfs_io_bio_alloc(1);
bio = btrfs_bio_alloc(1);
bio->bi_iter.bi_size = 0;
map_length = length;
/*
* Avoid races with device replace and make sure our bbio has devices
* Avoid races with device replace and make sure our bioc has devices
* associated to its stripes that don't go away while we are doing the
* read repair operation.
*/
@@ -2317,28 +2335,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
* stripe's dev and sector.
*/
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
&map_length, &bbio, 0);
&map_length, &bioc, 0);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
ASSERT(bbio->mirror_num == 1);
ASSERT(bioc->mirror_num == 1);
} else {
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
&map_length, &bbio, mirror_num);
&map_length, &bioc, mirror_num);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
BUG_ON(mirror_num != bbio->mirror_num);
BUG_ON(mirror_num != bioc->mirror_num);
}
sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[bbio->mirror_num - 1].dev;
btrfs_put_bbio(bbio);
dev = bioc->stripes[bioc->mirror_num - 1].dev;
btrfs_put_bioc(bioc);
if (!dev || !dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
btrfs_bio_counter_dec(fs_info);
@@ -2618,10 +2636,10 @@ int btrfs_repair_one_sector(struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio;
struct btrfs_io_bio *repair_io_bio;
struct btrfs_bio *repair_bbio;
blk_status_t status;
btrfs_debug(fs_info,
@@ -2639,24 +2657,23 @@ int btrfs_repair_one_sector(struct inode *inode,
return -EIO;
}
repair_bio = btrfs_io_bio_alloc(1);
repair_io_bio = btrfs_io_bio(repair_bio);
repair_bio = btrfs_bio_alloc(1);
repair_bbio = btrfs_bio(repair_bio);
repair_bio->bi_opf = REQ_OP_READ;
repair_bio->bi_end_io = failed_bio->bi_end_io;
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
repair_bio->bi_private = failed_bio->bi_private;
if (failed_io_bio->csum) {
if (failed_bbio->csum) {
const u32 csum_size = fs_info->csum_size;
repair_io_bio->csum = repair_io_bio->csum_inline;
memcpy(repair_io_bio->csum,
failed_io_bio->csum + csum_size * icsum, csum_size);
repair_bbio->csum = repair_bbio->csum_inline;
memcpy(repair_bbio->csum,
failed_bbio->csum + csum_size * icsum, csum_size);
}
bio_add_page(repair_bio, page, failrec->len, pgoff);
repair_io_bio->logical = failrec->start;
repair_io_bio->iter = repair_bio->bi_iter;
repair_bbio->iter = repair_bio->bi_iter;
btrfs_debug(btrfs_sb(inode->i_sb),
"repair read error: submitting new read to mirror %d",
@@ -2976,7 +2993,7 @@ static struct extent_buffer *find_extent_buffer_readpage(
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_bio *bbio = btrfs_bio(bio);
struct extent_io_tree *tree, *failure_tree;
struct processed_extent processed = { 0 };
/*
@@ -3003,7 +3020,7 @@ static void end_bio_extent_readpage(struct bio *bio)
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
bbio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
failure_tree = &BTRFS_I(inode)->io_failure_tree;
@@ -3028,14 +3045,14 @@ static void end_bio_extent_readpage(struct bio *bio)
end = start + bvec->bv_len - 1;
len = bvec->bv_len;
mirror = io_bio->mirror_num;
mirror = bbio->mirror_num;
if (likely(uptodate)) {
if (is_data_inode(inode)) {
error_bitmap = btrfs_verify_data_csum(io_bio,
error_bitmap = btrfs_verify_data_csum(bbio,
bio_offset, page, start, end);
ret = error_bitmap;
} else {
ret = btrfs_validate_metadata_buffer(io_bio,
ret = btrfs_validate_metadata_buffer(bbio,
page, start, end, mirror);
}
if (ret)
@@ -3106,7 +3123,7 @@ readpage_ok:
}
/* Release the last extent */
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
btrfs_io_bio_free_csum(io_bio);
btrfs_bio_free_csum(bbio);
bio_put(bio);
}
@@ -3115,53 +3132,43 @@ readpage_ok:
* new bio by bio_alloc_bioset as it does not initialize the bytes outside of
* 'bio' because use of __GFP_ZERO is not supported.
*/
static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
static inline void btrfs_bio_init(struct btrfs_bio *bbio)
{
memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
}
/*
* The following helpers allocate a bio. As it's backed by a bioset, it'll
* never fail. We're returning a bio right now but you can call btrfs_io_bio
* for the appropriate container_of magic
* Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs.
*
* The bio allocation is backed by bioset and does not fail.
*/
struct bio *btrfs_bio_alloc(u64 first_byte)
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
{
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
btrfs_bio_init(btrfs_bio(bio));
return bio;
}
struct bio *btrfs_bio_clone(struct bio *bio)
{
struct btrfs_io_bio *btrfs_bio;
struct btrfs_bio *bbio;
struct bio *new;
/* Bio allocation backed by a bioset does not fail */
new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
btrfs_bio = btrfs_io_bio(new);
btrfs_io_bio_init(btrfs_bio);
btrfs_bio->iter = bio->bi_iter;
bbio = btrfs_bio(new);
btrfs_bio_init(bbio);
bbio->iter = bio->bi_iter;
return new;
}
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
{
struct bio *bio;
/* Bio allocation backed by a bioset does not fail */
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
}
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
{
struct bio *bio;
struct btrfs_io_bio *btrfs_bio;
struct btrfs_bio *bbio;
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
@@ -3169,11 +3176,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
ASSERT(bio);
btrfs_bio = btrfs_io_bio(bio);
btrfs_io_bio_init(btrfs_bio);
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio);
bio_trim(bio, offset >> 9, size >> 9);
btrfs_bio->iter = bio->bi_iter;
bbio->iter = bio->bi_iter;
return bio;
}
@@ -3307,14 +3314,15 @@ static int alloc_new_bio(struct btrfs_inode *inode,
struct bio *bio;
int ret;
bio = btrfs_bio_alloc(BIO_MAX_VECS);
/*
* For compressed page range, its disk_bytenr is always @disk_bytenr
* passed in, no matter if we have added any range into previous bio.
*/
if (bio_flags & EXTENT_BIO_COMPRESSED)
bio = btrfs_bio_alloc(disk_bytenr);
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
else
bio = btrfs_bio_alloc(disk_bytenr + offset);
bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
bio_ctrl->bio = bio;
bio_ctrl->bio_flags = bio_flags;
bio->bi_end_io = end_io_func;
@@ -3327,7 +3335,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
if (wbc) {
struct block_device *bdev;
bdev = fs_info->fs_devices->latest_bdev;
bdev = fs_info->fs_devices->latest_dev->bdev;
bio_set_dev(bio, bdev);
wbc_init_bio(wbc, bio);
}
@@ -3341,7 +3349,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
goto error;
}
btrfs_io_bio(bio)->device = device;
btrfs_bio(bio)->device = device;
}
return 0;
error:
@@ -3599,6 +3607,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
bool force_bio_submit = false;
u64 disk_bytenr;
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
if (cur >= last_byte) {
struct extent_state *cached = NULL;
@@ -3777,17 +3786,18 @@ static void update_nr_written(struct writeback_control *wbc,
*/
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct page *page, struct writeback_control *wbc,
u64 delalloc_start, unsigned long *nr_written)
unsigned long *nr_written)
{
u64 page_end = delalloc_start + PAGE_SIZE - 1;
bool found;
const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
u64 delalloc_start = page_offset(page);
u64 delalloc_to_write = 0;
u64 delalloc_end = 0;
int ret;
int page_started = 0;
while (delalloc_start < page_end) {
u64 delalloc_end = page_end;
bool found;
while (delalloc_end < page_end) {
found = find_lock_delalloc_range(&inode->vfs_inode, page,
&delalloc_start,
&delalloc_end);
@@ -3854,12 +3864,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
struct page *page, u64 *start, u64 *end)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
struct btrfs_subpage_info *spi = fs_info->subpage_info;
u64 orig_start = *start;
/* Declare as unsigned long so we can use bitmap ops */
unsigned long dirty_bitmap;
unsigned long flags;
int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
int range_start_bit = nbits;
int range_start_bit;
int range_end_bit;
/*
@@ -3872,13 +3881,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
return;
}
range_start_bit = spi->dirty_offset +
(offset_in_page(orig_start) >> fs_info->sectorsize_bits);
/* We should have the page locked, but just in case */
spin_lock_irqsave(&subpage->lock, flags);
dirty_bitmap = subpage->dirty_bitmap;
bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
spi->dirty_offset + spi->bitmap_nr_bits);
spin_unlock_irqrestore(&subpage->lock, flags);
bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
BTRFS_SUBPAGE_BITMAP_SIZE);
range_start_bit -= spi->dirty_offset;
range_end_bit -= spi->dirty_offset;
*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
}
@@ -4054,8 +4068,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct extent_page_data *epd)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
u64 page_end = start + PAGE_SIZE - 1;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u64 page_start = page_offset(page);
const u64 page_end = page_start + PAGE_SIZE - 1;
int ret;
int nr = 0;
size_t pg_offset;
@@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
if (!epd->extent_locked) {
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
&nr_written);
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);
if (ret == 1)
return 0;
if (ret)
@@ -4141,8 +4155,20 @@ done:
* capable of that.
*/
if (PageError(page))
end_extent_writepage(page, ret, start, page_end);
unlock_page(page);
end_extent_writepage(page, ret, page_start, page_end);
if (epd->extent_locked) {
/*
* If epd->extent_locked, it's from extent_write_locked_range(),
* the page can either be locked by lock_page() or
* process_one_page().
* Let btrfs_page_unlock_writer() handle both cases.
*/
ASSERT(wbc);
btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
wbc->range_end + 1 - wbc->range_start);
} else {
unlock_page(page);
}
ASSERT(ret <= 0);
return ret;
}
@@ -4155,6 +4181,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
static void end_extent_buffer_writeback(struct extent_buffer *eb)
{
if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4602,12 +4631,11 @@ static int submit_eb_subpage(struct page *page,
int submitted = 0;
u64 page_start = page_offset(page);
int bit_start = 0;
const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
int ret;
/* Lock and write each dirty extent buffers in the range */
while (bit_start < nbits) {
while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
struct extent_buffer *eb;
unsigned long flags;
@@ -4623,7 +4651,8 @@ static int submit_eb_subpage(struct page *page,
break;
}
spin_lock_irqsave(&subpage->lock, flags);
if (!((1 << bit_start) & subpage->dirty_bitmap)) {
if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags);
spin_unlock(&page->mapping->private_lock);
bit_start++;
@@ -4756,8 +4785,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
free_extent_buffer(eb);
return ret;
}
if (cache)
if (cache) {
/* Impiles write in zoned mode */
btrfs_put_block_group(cache);
/* Mark the last eb in a block group */
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
}
ret = write_one_eb(eb, wbc, epd);
free_extent_buffer(eb);
if (ret < 0)
@@ -4873,7 +4907,7 @@ retry:
* extent io tree. Thus we don't want to submit such wild eb
* if the fs already has error.
*/
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
if (!BTRFS_FS_ERROR(fs_info)) {
ret = flush_write_bio(&epd);
} else {
ret = -EROFS;
@@ -5069,23 +5103,28 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
return ret;
}
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode)
/*
* Submit the pages in the range to bio for call sites which delalloc range has
* already been ran (aka, ordered extent inserted) and all pages are still
* locked.
*/
int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
{
bool found_error = false;
int first_error = 0;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
struct page *page;
unsigned long nr_pages = (end - start + PAGE_SIZE) >>
PAGE_SHIFT;
u64 cur = start;
unsigned long nr_pages;
const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
struct extent_page_data epd = {
.bio_ctrl = { 0 },
.extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL,
.sync_io = 1,
};
struct writeback_control wbc_writepages = {
.sync_mode = mode,
.nr_to_write = nr_pages * 2,
.sync_mode = WB_SYNC_ALL,
.range_start = start,
.range_end = end + 1,
/* We're called from an async helper function */
@@ -5093,33 +5132,51 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
.no_cgroup_owner = 1,
};
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
PAGE_SHIFT;
wbc_writepages.nr_to_write = nr_pages * 2;
wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
while (start <= end) {
page = find_get_page(mapping, start >> PAGE_SHIFT);
if (clear_page_dirty_for_io(page))
ret = __extent_writepage(page, &wbc_writepages, &epd);
else {
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
page, start, start + PAGE_SIZE - 1, true);
unlock_page(page);
while (cur <= end) {
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
page = find_get_page(mapping, cur >> PAGE_SHIFT);
/*
* All pages in the range are locked since
* btrfs_run_delalloc_range(), thus there is no way to clear
* the page dirty flag.
*/
ASSERT(PageLocked(page));
ASSERT(PageDirty(page));
clear_page_dirty_for_io(page);
ret = __extent_writepage(page, &wbc_writepages, &epd);
ASSERT(ret <= 0);
if (ret < 0) {
found_error = true;
first_error = ret;
}
put_page(page);
start += PAGE_SIZE;
cur = cur_end + 1;
}
ASSERT(ret <= 0);
if (ret == 0)
if (!found_error)
ret = flush_write_bio(&epd);
else
end_write_bio(&epd, ret);
wbc_detach_inode(&wbc_writepages);
if (found_error)
return first_error;
return ret;
}
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root);
const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info);
int ret = 0;
struct extent_page_data epd = {
.bio_ctrl = { 0 },
@@ -5127,7 +5184,15 @@ int extent_writepages(struct address_space *mapping,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
/*
* Allow only a single thread to do the reloc work in zoned mode to
* protect the write pointer updates.
*/
if (data_reloc && zoned)
btrfs_inode_lock(inode, 0);
ret = extent_write_cache_pages(mapping, wbc, &epd);
if (data_reloc && zoned)
btrfs_inode_unlock(inode, 0);
ASSERT(ret <= 0);
if (ret < 0) {
end_write_bio(&epd, ret);
@@ -6137,13 +6202,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
* page, but it may change in the future for 16K page size
* support, so we still preallocate the memory in the loop.
*/
ret = btrfs_alloc_subpage(fs_info, &prealloc,
BTRFS_SUBPAGE_METADATA);
if (ret < 0) {
unlock_page(p);
put_page(p);
exists = ERR_PTR(ret);
goto free_eb;
if (fs_info->sectorsize < PAGE_SIZE) {
prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
if (IS_ERR(prealloc)) {
ret = PTR_ERR(prealloc);
unlock_page(p);
put_page(p);
exists = ERR_PTR(ret);
goto free_eb;
}
}
spin_lock(&mapping->private_lock);
@@ -7167,32 +7234,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
}
}
#define GANG_LOOKUP_SIZE 16
static struct extent_buffer *get_next_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
{
struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
struct extent_buffer *gang[GANG_LOOKUP_SIZE];
struct extent_buffer *found = NULL;
u64 page_start = page_offset(page);
int ret;
int i;
u64 cur = page_start;
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
lockdep_assert_held(&fs_info->buffer_lock);
ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
bytenr >> fs_info->sectorsize_bits,
PAGE_SIZE / fs_info->nodesize);
for (i = 0; i < ret; i++) {
/* Already beyond page end */
if (gang[i]->start >= page_start + PAGE_SIZE)
break;
/* Found one */
if (gang[i]->start >= bytenr) {
found = gang[i];
break;
while (cur < page_start + PAGE_SIZE) {
int ret;
int i;
ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
(void **)gang, cur >> fs_info->sectorsize_bits,
min_t(unsigned int, GANG_LOOKUP_SIZE,
PAGE_SIZE / fs_info->nodesize));
if (ret == 0)
goto out;
for (i = 0; i < ret; i++) {
/* Already beyond page end */
if (gang[i]->start >= page_start + PAGE_SIZE)
goto out;
/* Found one */
if (gang[i]->start >= bytenr) {
found = gang[i];
goto out;
}
}
cur = gang[ret - 1]->start + gang[ret - 1]->len;
}
out:
return found;
}

View File

@@ -32,6 +32,7 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
EXTENT_BUFFER_ZONE_FINISH,
};
/* these are flags for __process_pages_contig */
@@ -183,8 +184,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
struct btrfs_bio_ctrl *bio_ctrl,
unsigned int read_flags, u64 *prev_em_start);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
@@ -277,14 +277,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
u32 bits_to_clear, unsigned long page_ops);
struct bio *btrfs_bio_alloc(u64 first_byte);
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);

View File

@@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
int i;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_bio_stripe *stripe = &map->stripes[i];
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
@@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
int i;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_bio_stripe *stripe = &map->stripes[i];
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
__clear_extent_bit(&device->alloc_state, stripe->physical,

View File

@@ -358,7 +358,7 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
* NULL, the checksum buffer is allocated and returned in
* btrfs_io_bio(bio)->csum instead.
* btrfs_bio(bio)->csum instead.
*
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
@@ -397,19 +397,18 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
return BLK_STS_RESOURCE;
if (!dst) {
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
struct btrfs_bio *bbio = btrfs_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
GFP_NOFS);
if (!btrfs_bio->csum) {
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
if (!bbio->csum) {
btrfs_free_path(path);
return BLK_STS_RESOURCE;
}
} else {
btrfs_bio->csum = btrfs_bio->csum_inline;
bbio->csum = bbio->csum_inline;
}
csum = btrfs_bio->csum;
csum = bbio->csum;
} else {
csum = dst;
}
@@ -709,12 +708,12 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
index = 0;
}
data = kmap_atomic(bvec.bv_page);
crypto_shash_digest(shash, data + bvec.bv_offset
+ (i * fs_info->sectorsize),
data = bvec_kmap_local(&bvec);
crypto_shash_digest(shash,
data + (i * fs_info->sectorsize),
fs_info->sectorsize,
sums->sums + index);
kunmap_atomic(data);
kunmap_local(data);
index += fs_info->csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;

View File

@@ -437,9 +437,15 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
/*
* unlocks pages after btrfs_file_write is done with them
*/
static void btrfs_drop_pages(struct page **pages, size_t num_pages)
static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
struct page **pages, size_t num_pages,
u64 pos, u64 copied)
{
size_t i;
u64 block_start = round_down(pos, fs_info->sectorsize);
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
ASSERT(block_len <= U32_MAX);
for (i = 0; i < num_pages; i++) {
/* page checked is some magic around finding pages that
* have been modified without going through btrfs_set_page_dirty
@@ -447,7 +453,8 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
* accessed as prepare_pages should have marked them accessed
* in prepare_pages via find_or_create_page()
*/
ClearPageChecked(pages[i]);
btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start,
block_len);
unlock_page(pages[i]);
put_page(pages[i]);
}
@@ -504,7 +511,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
struct page *p = pages[i];
btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
ClearPageChecked(p);
btrfs_page_clamp_clear_checked(fs_info, p, start_pos, num_bytes);
btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
}
@@ -869,7 +876,8 @@ next_slot:
btrfs_init_data_ref(&ref,
root->root_key.objectid,
new_key.objectid,
args->start - extent_offset);
args->start - extent_offset,
0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
}
@@ -955,7 +963,8 @@ delete_extent_item:
btrfs_init_data_ref(&ref,
root->root_key.objectid,
key.objectid,
key.offset - extent_offset);
key.offset - extent_offset, 0,
false);
ret = btrfs_free_extent(trans, &ref);
BUG_ON(ret); /* -ENOMEM */
args->bytes_found += extent_end - key.offset;
@@ -1020,8 +1029,7 @@ delete_extent_item:
if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
path->slots[0]++;
}
setup_items_for_insert(root, path, &key,
&args->extent_item_size, 1);
btrfs_setup_item_for_insert(root, path, &key, args->extent_item_size);
args->extent_inserted = true;
}
@@ -1232,7 +1240,7 @@ again:
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
num_bytes, 0);
btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
orig_offset);
orig_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1257,7 +1265,8 @@ again:
other_end = 0;
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
num_bytes, 0);
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset,
0, false);
if (extent_mergeable(leaf, path->slots[0] + 1,
ino, bytenr, orig_offset,
&other_start, &other_end)) {
@@ -1844,7 +1853,7 @@ again:
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
break;
}
@@ -1852,7 +1861,7 @@ again:
if (only_release_metadata)
btrfs_check_nocow_unlock(BTRFS_I(inode));
btrfs_drop_pages(pages, num_pages);
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
cond_resched();
@@ -2012,7 +2021,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
* have opened a file as writable, we have to stop this write operation
* to ensure consistency.
*/
if (test_bit(BTRFS_FS_STATE_ERROR, &inode->root->fs_info->fs_state))
if (BTRFS_FS_ERROR(inode->root->fs_info))
return -EROFS;
if (!(iocb->ki_flags & IOCB_DIRECT) &&
@@ -2620,7 +2629,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
extent_info->disk_len, 0);
ref_offset = extent_info->file_offset - extent_info->data_offset;
btrfs_init_data_ref(&ref, root->root_key.objectid,
btrfs_ino(inode), ref_offset);
btrfs_ino(inode), ref_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
}

View File

@@ -22,6 +22,7 @@
#include "delalloc-space.h"
#include "block-group.h"
#include "discard.h"
#include "subpage.h"
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
@@ -411,7 +412,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
for (i = 0; i < io_ctl->num_pages; i++) {
if (io_ctl->pages[i]) {
ClearPageChecked(io_ctl->pages[i]);
btrfs_page_clear_checked(io_ctl->fs_info,
io_ctl->pages[i],
page_offset(io_ctl->pages[i]),
PAGE_SIZE);
unlock_page(io_ctl->pages[i]);
put_page(io_ctl->pages[i]);
}
@@ -2539,10 +2543,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
bool initial = (size == block_group->length);
u64 reclaimable_unusable;
WARN_ON(!initial && offset + size > block_group->zone_capacity);
spin_lock(&ctl->tree_lock);
if (!used)
to_free = size;
else if (initial)
to_free = block_group->zone_capacity;
else if (offset >= block_group->alloc_offset)
to_free = size;
else if (offset + size <= block_group->alloc_offset)
@@ -2565,12 +2575,15 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
spin_unlock(&block_group->lock);
}
reclaimable_unusable = block_group->zone_unusable -
(block_group->length - block_group->zone_capacity);
/* All the region is now unusable. Mark it as unused and reclaim */
if (block_group->zone_unusable == block_group->length) {
btrfs_mark_bg_unused(block_group);
} else if (bg_reclaim_threshold &&
block_group->zone_unusable >=
div_factor_fine(block_group->length, bg_reclaim_threshold)) {
reclaimable_unusable >=
div_factor_fine(block_group->zone_capacity,
bg_reclaim_threshold)) {
btrfs_mark_bg_to_reclaim(block_group);
}
@@ -2754,8 +2767,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
* out the free space after the allocation offset.
*/
if (btrfs_is_zoned(fs_info)) {
btrfs_info(fs_info, "free space %llu",
block_group->length - block_group->alloc_offset);
btrfs_info(fs_info, "free space %llu active %d",
block_group->zone_capacity - block_group->alloc_offset,
block_group->zone_is_active);
return;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -96,11 +96,12 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_DEBUG
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
lockdep_assert_held(&eb->lock);
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)
{
lockdep_assert_held_write(&eb->lock);
}
#else
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { }
#endif
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);

View File

@@ -32,19 +32,19 @@
* payload.
* One regular LZO compressed extent can have one or more segments.
* For inlined LZO compressed extent, only one segment is allowed.
* One segment represents at most one page of uncompressed data.
* One segment represents at most one sector of uncompressed data.
*
* 2.1 Segment header
* Fixed size. LZO_LEN (4) bytes long, LE32.
* Records the total size of the segment (not including the header).
* Segment header never crosses page boundary, thus it's possible to
* have at most 3 padding zeros at the end of the page.
* Segment header never crosses sector boundary, thus it's possible to
* have at most 3 padding zeros at the end of the sector.
*
* 2.2 Data Payload
* Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
* which is 4419 for a 4KiB page.
* Variable size. Size up limit should be lzo1x_worst_compress(sectorsize)
* which is 4419 for a 4KiB sectorsize.
*
* Example:
* Example with 4K sectorsize:
* Page 1:
* 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
* 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
@@ -112,170 +112,174 @@ static inline size_t read_compress_length(const char *buf)
return le32_to_cpu(dlen);
}
/*
* Will do:
*
* - Write a segment header into the destination
* - Copy the compressed buffer into the destination
* - Make sure we have enough space in the last sector to fit a segment header
* If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros.
*
* Will allocate new pages when needed.
*/
static int copy_compressed_data_to_page(char *compressed_data,
size_t compressed_size,
struct page **out_pages,
u32 *cur_out,
const u32 sectorsize)
{
u32 sector_bytes_left;
u32 orig_out;
struct page *cur_page;
char *kaddr;
/*
* We never allow a segment header crossing sector boundary, previous
* run should ensure we have enough space left inside the sector.
*/
ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize);
cur_page = out_pages[*cur_out / PAGE_SIZE];
/* Allocate a new page */
if (!cur_page) {
cur_page = alloc_page(GFP_NOFS);
if (!cur_page)
return -ENOMEM;
out_pages[*cur_out / PAGE_SIZE] = cur_page;
}
kaddr = kmap(cur_page);
write_compress_length(kaddr + offset_in_page(*cur_out),
compressed_size);
*cur_out += LZO_LEN;
orig_out = *cur_out;
/* Copy compressed data */
while (*cur_out - orig_out < compressed_size) {
u32 copy_len = min_t(u32, sectorsize - *cur_out % sectorsize,
orig_out + compressed_size - *cur_out);
kunmap(cur_page);
cur_page = out_pages[*cur_out / PAGE_SIZE];
/* Allocate a new page */
if (!cur_page) {
cur_page = alloc_page(GFP_NOFS);
if (!cur_page)
return -ENOMEM;
out_pages[*cur_out / PAGE_SIZE] = cur_page;
}
kaddr = kmap(cur_page);
memcpy(kaddr + offset_in_page(*cur_out),
compressed_data + *cur_out - orig_out, copy_len);
*cur_out += copy_len;
}
/*
* Check if we can fit the next segment header into the remaining space
* of the sector.
*/
sector_bytes_left = round_up(*cur_out, sectorsize) - *cur_out;
if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0)
goto out;
/* The remaining size is not enough, pad it with zeros */
memset(kaddr + offset_in_page(*cur_out), 0,
sector_bytes_left);
*cur_out += sector_bytes_left;
out:
kunmap(cur_page);
return 0;
}
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
struct page *page_in = NULL;
char *sizes_ptr;
int ret = 0;
char *data_in;
char *cpage_out, *sizes_ptr;
int nr_pages = 0;
struct page *in_page = NULL;
struct page *out_page = NULL;
unsigned long bytes_left;
unsigned long len = *total_out;
unsigned long nr_dest_pages = *out_pages;
const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
size_t in_len;
size_t out_len;
char *buf;
unsigned long tot_in = 0;
unsigned long tot_out = 0;
unsigned long pg_bytes_left;
unsigned long out_offset;
unsigned long bytes;
/* Points to the file offset of input data */
u64 cur_in = start;
/* Points to the current output byte */
u32 cur_out = 0;
u32 len = *total_out;
*out_pages = 0;
*total_out = 0;
*total_in = 0;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
data_in = kmap(in_page);
/*
* store the size of all chunks of compressed data in
* the first 4 bytes
* Skip the header for now, we will later come back and write the total
* compressed size
*/
out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
out_offset = LZO_LEN;
tot_out = LZO_LEN;
pages[0] = out_page;
nr_pages = 1;
pg_bytes_left = PAGE_SIZE - LZO_LEN;
cur_out += LZO_LEN;
while (cur_in < start + len) {
char *data_in;
const u32 sectorsize_mask = sectorsize - 1;
u32 sector_off = (cur_in - start) & sectorsize_mask;
u32 in_len;
size_t out_len;
/* compress at most one page of data each time */
in_len = min(len, PAGE_SIZE);
while (tot_in < len) {
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
&out_len, workspace->mem);
if (ret != LZO_E_OK) {
pr_debug("BTRFS: lzo in loop returned %d\n",
ret);
/* Get the input page first */
if (!page_in) {
page_in = find_get_page(mapping, cur_in >> PAGE_SHIFT);
ASSERT(page_in);
}
/* Compress at most one sector of data each time */
in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
ASSERT(in_len);
data_in = kmap(page_in);
ret = lzo1x_1_compress(data_in +
offset_in_page(cur_in), in_len,
workspace->cbuf, &out_len,
workspace->mem);
kunmap(page_in);
if (ret < 0) {
pr_debug("BTRFS: lzo in loop returned %d\n", ret);
ret = -EIO;
goto out;
}
/* store the size of this chunk of compressed data */
write_compress_length(cpage_out + out_offset, out_len);
tot_out += LZO_LEN;
out_offset += LZO_LEN;
pg_bytes_left -= LZO_LEN;
ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
pages, &cur_out, sectorsize);
if (ret < 0)
goto out;
tot_in += in_len;
tot_out += out_len;
cur_in += in_len;
/* copy bytes from the working buffer into the pages */
buf = workspace->cbuf;
while (out_len) {
bytes = min_t(unsigned long, pg_bytes_left, out_len);
memcpy(cpage_out + out_offset, buf, bytes);
out_len -= bytes;
pg_bytes_left -= bytes;
buf += bytes;
out_offset += bytes;
/*
* we need another page for writing out.
*
* Note if there's less than 4 bytes left, we just
* skip to a new page.
*/
if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
pg_bytes_left == 0) {
if (pg_bytes_left) {
memset(cpage_out + out_offset, 0,
pg_bytes_left);
tot_out += pg_bytes_left;
}
/* we're done, don't allocate new page */
if (out_len == 0 && tot_in >= len)
break;
kunmap(out_page);
if (nr_pages == nr_dest_pages) {
out_page = NULL;
ret = -E2BIG;
goto out;
}
out_page = alloc_page(GFP_NOFS);
if (out_page == NULL) {
ret = -ENOMEM;
goto out;
}
cpage_out = kmap(out_page);
pages[nr_pages++] = out_page;
pg_bytes_left = PAGE_SIZE;
out_offset = 0;
}
}
/* we're making it bigger, give up */
if (tot_in > 8192 && tot_in < tot_out) {
/*
* Check if we're making it bigger after two sectors. And if
* it is so, give up.
*/
if (cur_in - start > sectorsize * 2 && cur_in - start < cur_out) {
ret = -E2BIG;
goto out;
}
/* we're all done */
if (tot_in >= len)
break;
if (tot_out > max_out)
break;
bytes_left = len - tot_in;
kunmap(in_page);
put_page(in_page);
start += PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
data_in = kmap(in_page);
in_len = min(bytes_left, PAGE_SIZE);
/* Check if we have reached page boundary */
if (IS_ALIGNED(cur_in, PAGE_SIZE)) {
put_page(page_in);
page_in = NULL;
}
}
if (tot_out >= tot_in) {
ret = -E2BIG;
goto out;
}
/* store the size of all chunks of compressed data */
/* Store the size of all chunks of compressed data */
sizes_ptr = kmap_local_page(pages[0]);
write_compress_length(sizes_ptr, tot_out);
write_compress_length(sizes_ptr, cur_out);
kunmap_local(sizes_ptr);
ret = 0;
*total_out = tot_out;
*total_in = tot_in;
*total_out = cur_out;
*total_in = cur_in - start;
out:
*out_pages = nr_pages;
if (out_page)
kunmap(out_page);
if (in_page) {
kunmap(in_page);
put_page(in_page);
}
*out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE);
return ret;
}
@@ -357,9 +361,10 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
ASSERT(cur_in / sectorsize ==
(cur_in + LZO_LEN - 1) / sectorsize);
cur_page = cb->compressed_pages[cur_in / PAGE_SIZE];
kaddr = kmap(cur_page);
ASSERT(cur_page);
kaddr = kmap(cur_page);
seg_len = read_compress_length(kaddr + offset_in_page(cur_in));
kunmap(cur_page);
cur_in += LZO_LEN;
/* Copy the compressed segment payload into workspace */

View File

@@ -60,8 +60,7 @@ enum btrfs_rbio_ops {
};
struct btrfs_raid_bio {
struct btrfs_fs_info *fs_info;
struct btrfs_bio *bbio;
struct btrfs_io_context *bioc;
/* while we're doing rmw on a stripe
* we put it into a hash table so we can
@@ -192,7 +191,7 @@ static void scrub_parity_work(struct btrfs_work *work);
static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
{
btrfs_init_work(&rbio->work, work_func, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
}
/*
@@ -271,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/
static int rbio_bucket(struct btrfs_raid_bio *rbio)
{
u64 num = rbio->bbio->raid_map[0];
u64 num = rbio->bioc->raid_map[0];
/*
* we shift down quite a bit. We're using byte
@@ -345,7 +344,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
return;
table = rbio->fs_info->stripe_hash_table;
table = rbio->bioc->fs_info->stripe_hash_table;
h = table->table + bucket;
/* hold the lock for the bucket because we may be
@@ -400,7 +399,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
return;
table = rbio->fs_info->stripe_hash_table;
table = rbio->bioc->fs_info->stripe_hash_table;
spin_lock_irqsave(&table->cache_lock, flags);
__remove_rbio_from_cache(rbio);
@@ -460,7 +459,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
return;
table = rbio->fs_info->stripe_hash_table;
table = rbio->bioc->fs_info->stripe_hash_table;
spin_lock_irqsave(&table->cache_lock, flags);
spin_lock(&rbio->bio_list_lock);
@@ -559,8 +558,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0;
if (last->bbio->raid_map[0] !=
cur->bbio->raid_map[0])
if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
return 0;
/* we can't merge with different operations */
@@ -669,11 +667,11 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) {
if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
continue;
spin_lock(&cur->bio_list_lock);
@@ -751,7 +749,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
int keep_cache = 0;
bucket = rbio_bucket(rbio);
h = rbio->fs_info->stripe_hash_table->table + bucket;
h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
if (list_empty(&rbio->plug_list))
cache_rbio(rbio);
@@ -838,7 +836,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
}
}
btrfs_put_bbio(rbio->bbio);
btrfs_put_bioc(rbio->bioc);
kfree(rbio);
}
@@ -865,7 +863,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
struct bio *extra;
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
/*
* At this moment, rbio->bio_list is empty, however since rbio does not
@@ -906,7 +904,7 @@ static void raid_write_end_io(struct bio *bio)
/* OK, we have read all the stripes we need to. */
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0 : rbio->bbio->max_errors;
0 : rbio->bioc->max_errors;
if (atomic_read(&rbio->error) > max_errors)
err = BLK_STS_IOERR;
@@ -961,12 +959,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
* this does not allocate any pages for rbio->pages.
*/
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio,
struct btrfs_io_context *bioc,
u64 stripe_len)
{
struct btrfs_raid_bio *rbio;
int nr_data = 0;
int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
int num_pages = rbio_nr_pages(stripe_len, real_stripes);
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
void *p;
@@ -987,8 +985,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
spin_lock_init(&rbio->bio_list_lock);
INIT_LIST_HEAD(&rbio->stripe_cache);
INIT_LIST_HEAD(&rbio->hash_list);
rbio->bbio = bbio;
rbio->fs_info = fs_info;
rbio->bioc = bioc;
rbio->stripe_len = stripe_len;
rbio->nr_pages = num_pages;
rbio->real_stripes = real_stripes;
@@ -1015,9 +1012,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
#undef CONSUME_ALLOC
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
nr_data = real_stripes - 2;
else
BUG();
@@ -1077,10 +1074,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
struct bio *last = bio_list->tail;
int ret;
struct bio *bio;
struct btrfs_bio_stripe *stripe;
struct btrfs_io_stripe *stripe;
u64 disk_start;
stripe = &rbio->bbio->stripes[stripe_nr];
stripe = &rbio->bioc->stripes[stripe_nr];
disk_start = stripe->physical + (page_index << PAGE_SHIFT);
/* if the device is missing, just fail this stripe */
@@ -1105,8 +1102,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
}
/* put a new bio on the list */
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
btrfs_io_bio(bio)->device = stripe->dev;
bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
btrfs_bio(bio)->device = stripe->dev;
bio->bi_iter.bi_size = 0;
bio_set_dev(bio, stripe->dev->bdev);
bio->bi_iter.bi_sector = disk_start >> 9;
@@ -1155,11 +1152,11 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
int i = 0;
start = bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->bbio->raid_map[0];
stripe_offset = start - rbio->bioc->raid_map[0];
page_index = stripe_offset >> PAGE_SHIFT;
if (bio_flagged(bio, BIO_CLONED))
bio->bi_iter = btrfs_io_bio(bio)->iter;
bio->bi_iter = btrfs_bio(bio)->iter;
bio_for_each_segment(bvec, bio, iter) {
rbio->bio_pages[page_index + i] = bvec.bv_page;
@@ -1179,7 +1176,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
*/
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
struct btrfs_io_context *bioc = rbio->bioc;
void **pointers = rbio->finish_pointers;
int nr_data = rbio->nr_data;
int stripe;
@@ -1284,11 +1281,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
}
}
if (likely(!bbio->num_tgtdevs))
if (likely(!bioc->num_tgtdevs))
goto write_data;
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
if (!bbio->tgtdev_map[stripe])
if (!bioc->tgtdev_map[stripe])
continue;
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
@@ -1302,7 +1299,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
}
ret = rbio_add_io_page(rbio, &bio_list, page,
rbio->bbio->tgtdev_map[stripe],
rbio->bioc->tgtdev_map[stripe],
pagenr, rbio->stripe_len);
if (ret)
goto cleanup;
@@ -1339,12 +1336,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
{
u64 physical = bio->bi_iter.bi_sector;
int i;
struct btrfs_bio_stripe *stripe;
struct btrfs_io_stripe *stripe;
physical <<= 9;
for (i = 0; i < rbio->bbio->num_stripes; i++) {
stripe = &rbio->bbio->stripes[i];
for (i = 0; i < rbio->bioc->num_stripes; i++) {
stripe = &rbio->bioc->stripes[i];
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
return i;
@@ -1365,7 +1362,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
int i;
for (i = 0; i < rbio->nr_data; i++) {
u64 stripe_start = rbio->bbio->raid_map[i];
u64 stripe_start = rbio->bioc->raid_map[i];
if (in_range(logical, stripe_start, rbio->stripe_len))
return i;
@@ -1456,7 +1453,7 @@ static void raid_rmw_end_io(struct bio *bio)
if (!atomic_dec_and_test(&rbio->stripes_pending))
return;
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
goto cleanup;
/*
@@ -1538,8 +1535,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
}
/*
* the bbio may be freed once we submit the last bio. Make sure
* not to touch it after that
* The bioc may be freed once we submit the last bio. Make sure not to
* touch it after that.
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while ((bio = bio_list_pop(&bio_list))) {
@@ -1547,7 +1544,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_end_io = raid_rmw_end_io;
bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
submit_bio(bio);
}
@@ -1719,17 +1716,18 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
/*
* our main entry point for writes from the rest of the FS.
*/
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len)
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
u64 stripe_len)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
struct btrfs_plug_cb *plug = NULL;
struct blk_plug_cb *cb;
int ret;
rbio = alloc_rbio(fs_info, bbio, stripe_len);
rbio = alloc_rbio(fs_info, bioc, stripe_len);
if (IS_ERR(rbio)) {
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
return PTR_ERR(rbio);
}
bio_list_add(&rbio->bio_list, bio);
@@ -1842,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
}
/* all raid6 handling here */
if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
/*
* single failure, rebuild from parity raid5
* style
@@ -1874,8 +1872,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
* here due to a crc mismatch and we can't give them the
* data they want
*/
if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->bbio->raid_map[faila] ==
if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->bioc->raid_map[faila] ==
RAID5_P_STRIPE) {
err = BLK_STS_IOERR;
goto cleanup;
@@ -1887,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
goto pstripe;
}
if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
raid6_datap_recov(rbio->real_stripes,
PAGE_SIZE, faila, pointers);
} else {
@@ -2006,7 +2004,7 @@ static void raid_recover_end_io(struct bio *bio)
if (!atomic_dec_and_test(&rbio->stripes_pending))
return;
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
rbio_orig_end_io(rbio, BLK_STS_IOERR);
else
__raid_recover_end_io(rbio);
@@ -2074,7 +2072,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
* were up to date, or we might have no bios to read because
* the devices were gone.
*/
if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
__raid_recover_end_io(rbio);
return 0;
} else {
@@ -2083,8 +2081,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
}
/*
* the bbio may be freed once we submit the last bio. Make sure
* not to touch it after that
* The bioc may be freed once we submit the last bio. Make sure not to
* touch it after that.
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while ((bio = bio_list_pop(&bio_list))) {
@@ -2092,7 +2090,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_end_io = raid_recover_end_io;
bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
submit_bio(bio);
}
@@ -2116,22 +2114,22 @@ cleanup:
* so we assume the bio they send down corresponds to a failed part
* of the drive.
*/
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len,
int mirror_num, int generic_io)
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
u64 stripe_len, int mirror_num, int generic_io)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
int ret;
if (generic_io) {
ASSERT(bbio->mirror_num == mirror_num);
btrfs_io_bio(bio)->mirror_num = mirror_num;
ASSERT(bioc->mirror_num == mirror_num);
btrfs_bio(bio)->mirror_num = mirror_num;
}
rbio = alloc_rbio(fs_info, bbio, stripe_len);
rbio = alloc_rbio(fs_info, bioc, stripe_len);
if (IS_ERR(rbio)) {
if (generic_io)
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
return PTR_ERR(rbio);
}
@@ -2142,11 +2140,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
btrfs_warn(fs_info,
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
__func__, bio->bi_iter.bi_sector << 9,
(u64)bio->bi_iter.bi_size, bbio->map_type);
(u64)bio->bi_iter.bi_size, bioc->map_type);
if (generic_io)
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
kfree(rbio);
return -EIO;
}
@@ -2155,7 +2153,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
btrfs_bio_counter_inc_noblocked(fs_info);
rbio->generic_bio_cnt = 1;
} else {
btrfs_get_bbio(bbio);
btrfs_get_bioc(bioc);
}
/*
@@ -2214,23 +2212,23 @@ static void read_rebuild_work(struct btrfs_work *work)
/*
* The following code is used to scrub/replace the parity stripe
*
* Caller must have already increased bio_counter for getting @bbio.
* Caller must have already increased bio_counter for getting @bioc.
*
* Note: We need make sure all the pages that add into the scrub/replace
* raid bio are correct and not be changed during the scrub/replace. That
* is those pages just hold metadata or file data with checksum.
*/
struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors)
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
struct btrfs_io_context *bioc,
u64 stripe_len, struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
int i;
rbio = alloc_rbio(fs_info, bbio, stripe_len);
rbio = alloc_rbio(fs_info, bioc, stripe_len);
if (IS_ERR(rbio))
return NULL;
bio_list_add(&rbio->bio_list, bio);
@@ -2242,12 +2240,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
/*
* After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
* After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
* to the end position, so this search can start from the first parity
* stripe.
*/
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
if (bbio->stripes[i].dev == scrub_dev) {
if (bioc->stripes[i].dev == scrub_dev) {
rbio->scrubp = i;
break;
}
@@ -2260,7 +2258,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
/*
* We have already increased bio_counter when getting bbio, record it
* We have already increased bio_counter when getting bioc, record it
* so we can free it at rbio_orig_end_io().
*/
rbio->generic_bio_cnt = 1;
@@ -2275,10 +2273,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
int stripe_offset;
int index;
ASSERT(logical >= rbio->bbio->raid_map[0]);
ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
ASSERT(logical >= rbio->bioc->raid_map[0]);
ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] +
rbio->stripe_len * rbio->nr_data);
stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
index = stripe_offset >> PAGE_SHIFT;
rbio->bio_pages[index] = page;
}
@@ -2312,7 +2310,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
struct btrfs_bio *bbio = rbio->bbio;
struct btrfs_io_context *bioc = rbio->bioc;
void **pointers = rbio->finish_pointers;
unsigned long *pbitmap = rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
@@ -2335,7 +2333,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
else
BUG();
if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
is_replace = 1;
bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
}
@@ -2435,7 +2433,7 @@ writeback:
page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
ret = rbio_add_io_page(rbio, &bio_list, page,
bbio->tgtdev_map[rbio->scrubp],
bioc->tgtdev_map[rbio->scrubp],
pagenr, rbio->stripe_len);
if (ret)
goto cleanup;
@@ -2483,7 +2481,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
*/
static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
{
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
goto cleanup;
if (rbio->faila >= 0 || rbio->failb >= 0) {
@@ -2504,7 +2502,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
* the data, so the capability of the repair is declined.
* (In the case of RAID5, we can not repair anything)
*/
if (dfail > rbio->bbio->max_errors - 1)
if (dfail > rbio->bioc->max_errors - 1)
goto cleanup;
/*
@@ -2625,8 +2623,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
}
/*
* the bbio may be freed once we submit the last bio. Make sure
* not to touch it after that
* The bioc may be freed once we submit the last bio. Make sure not to
* touch it after that.
*/
atomic_set(&rbio->stripes_pending, bios_to_read);
while ((bio = bio_list_pop(&bio_list))) {
@@ -2634,7 +2632,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_end_io = raid56_parity_scrub_end_io;
bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
submit_bio(bio);
}
@@ -2670,12 +2668,13 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
/* The following code is used for dev replace of a missing RAID 5/6 device. */
struct btrfs_raid_bio *
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 length)
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
u64 length)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
rbio = alloc_rbio(fs_info, bbio, length);
rbio = alloc_rbio(fs_info, bioc, length);
if (IS_ERR(rbio))
return NULL;
@@ -2695,7 +2694,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
}
/*
* When we get bbio, we have already increased bio_counter, record it
* When we get bioc, we have already increased bio_counter, record it
* so we can free it at rbio_orig_end_io()
*/
rbio->generic_bio_cnt = 1;

View File

@@ -30,25 +30,23 @@ static inline int nr_data_stripes(const struct map_lookup *map)
struct btrfs_raid_bio;
struct btrfs_device;
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len,
int mirror_num, int generic_io);
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len);
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
u64 stripe_len, int mirror_num, int generic_io);
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
u64 stripe_len);
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
u64 logical);
struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors);
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
struct btrfs_io_context *bioc, u64 stripe_len,
struct btrfs_device *scrub_dev,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
struct btrfs_raid_bio *
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
struct btrfs_bio *bbio, u64 length);
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
u64 length);
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);

View File

@@ -227,7 +227,7 @@ start_machine:
}
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
struct btrfs_bio *bbio)
struct btrfs_io_context *bioc)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
@@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
kref_init(&zone->refcnt);
zone->elems = 0;
zone->device = dev; /* our device always sits at index 0 */
for (i = 0; i < bbio->num_stripes; ++i) {
for (i = 0; i < bioc->num_stripes; ++i) {
/* bounds have already been checked */
zone->devs[i] = bbio->stripes[i].dev;
zone->devs[i] = bioc->stripes[i].dev;
}
zone->ndevs = bbio->num_stripes;
zone->ndevs = bioc->num_stripes;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
@@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
int ret;
struct reada_extent *re = NULL;
struct reada_extent *re_exist = NULL;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
struct btrfs_device *dev;
struct btrfs_device *prev_dev;
u64 length;
@@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
*/
length = fs_info->nodesize;
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bbio, 0);
if (ret || !bbio || length < fs_info->nodesize)
&length, &bioc, 0);
if (ret || !bioc || length < fs_info->nodesize)
goto error;
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
if (bioc->num_stripes > BTRFS_MAX_MIRRORS) {
btrfs_err(fs_info,
"readahead: more than %d copies not supported",
BTRFS_MAX_MIRRORS);
goto error;
}
real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
for (nzones = 0; nzones < real_stripes; ++nzones) {
struct reada_zone *zone;
dev = bbio->stripes[nzones].dev;
dev = bioc->stripes[nzones].dev;
/* cannot read ahead on missing device. */
if (!dev->bdev)
continue;
zone = reada_find_zone(dev, logical, bbio);
zone = reada_find_zone(dev, logical, bioc);
if (!zone)
continue;
@@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
if (!have_zone)
goto error;
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
return re;
error:
@@ -488,7 +488,7 @@ error:
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
kfree(re);
return re_exist;
}

View File

@@ -678,10 +678,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
if (generic_ref->type == BTRFS_REF_METADATA) {
if (!parent)
ref_root = generic_ref->tree_ref.root;
ref_root = generic_ref->tree_ref.owning_root;
owner = generic_ref->tree_ref.level;
} else if (!parent) {
ref_root = generic_ref->data_ref.ref_root;
ref_root = generic_ref->data_ref.owning_root;
owner = generic_ref->data_ref.ino;
offset = generic_ref->data_ref.offset;
}

View File

@@ -138,7 +138,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
}
btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
ClearPageChecked(page);
btrfs_page_clear_checked(fs_info, page, file_offset, block_size);
btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
out_unlock:
if (page) {
@@ -649,7 +649,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
struct inode *dst, u64 dst_loff)
{
int ret;
int ret = 0;
u64 i, tail_len, chunk_count;
struct btrfs_root *root_dst = BTRFS_I(dst)->root;

View File

@@ -25,6 +25,7 @@
#include "backref.h"
#include "misc.h"
#include "subpage.h"
#include "zoned.h"
/*
* Relocation overview
@@ -1145,9 +1146,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(leaf, fi);
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
num_bytes, parent);
ref.real_root = root->root_key.objectid;
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset,
root->root_key.objectid, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1156,9 +1157,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
num_bytes, parent);
ref.real_root = root->root_key.objectid;
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset,
root->root_key.objectid, false);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1367,8 +1368,8 @@ again:
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
blocksize, path->nodes[level]->start);
ref.skip_qgroup = true;
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
0, true);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1376,8 +1377,8 @@ again:
}
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
blocksize, 0);
ref.skip_qgroup = true;
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
true);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1386,8 +1387,8 @@ again:
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
blocksize, path->nodes[level]->start);
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
ref.skip_qgroup = true;
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
0, true);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -1396,8 +1397,8 @@ again:
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
blocksize, 0);
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
ref.skip_qgroup = true;
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
0, true);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -2473,9 +2474,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
node->eb->start, blocksize,
upper->eb->start);
ref.real_root = root->root_key.objectid;
btrfs_init_tree_ref(&ref, node->level,
btrfs_header_owner(upper->eb));
btrfs_header_owner(upper->eb),
root->root_key.objectid, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (!ret)
ret = btrfs_drop_subtree(trans, root, eb,
@@ -2691,8 +2692,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
list_add_tail(&node->list, &rc->backref_cache.changed);
} else {
path->lowest_level = node->level;
if (root == root->fs_info->chunk_root)
btrfs_reserve_chunk_metadata(trans, false);
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
btrfs_release_path(path);
if (root == root->fs_info->chunk_root)
btrfs_trans_release_chunk_metadata(trans);
if (ret > 0)
ret = 0;
}
@@ -2852,31 +2857,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
if (ret)
return ret;
/*
* On a zoned filesystem, we cannot preallocate the file region.
* Instead, we dirty and fiemap_write the region.
*/
if (btrfs_is_zoned(inode->root->fs_info)) {
struct btrfs_root *root = inode->root;
struct btrfs_trans_handle *trans;
end = cluster->end - offset + 1;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
i_size_write(&inode->vfs_inode, end);
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
return btrfs_end_transaction(trans);
}
btrfs_inode_lock(&inode->vfs_inode, 0);
for (nr = 0; nr < cluster->nr; nr++) {
start = cluster->boundary[nr] - offset;
@@ -2903,9 +2883,8 @@ static noinline_for_stack int prealloc_file_extent_cluster(
return ret;
}
static noinline_for_stack
int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
u64 block_start)
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
u64 start, u64 end, u64 block_start)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
@@ -3084,7 +3063,6 @@ release_page:
static int relocate_file_extent_cluster(struct inode *inode,
struct file_extent_cluster *cluster)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 offset = BTRFS_I(inode)->index_cnt;
unsigned long index;
unsigned long last_index;
@@ -3105,7 +3083,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
file_ra_state_init(ra, inode->i_mapping);
ret = setup_extent_mapping(inode, cluster->start - offset,
ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
cluster->end - offset, cluster->start);
if (ret)
goto out;
@@ -3114,8 +3092,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
for (index = (cluster->start - offset) >> PAGE_SHIFT;
index <= last_index && !ret; index++)
ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
if (btrfs_is_zoned(fs_info) && !ret)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret == 0)
WARN_ON(cluster_nr != cluster->nr);
out:
@@ -3770,12 +3746,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_inode_item *item;
struct extent_buffer *leaf;
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
int ret;
if (btrfs_is_zoned(trans->fs_info))
flags &= ~BTRFS_INODE_PREALLOC;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -3790,7 +3762,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
btrfs_set_inode_generation(leaf, item, 1);
btrfs_set_inode_size(leaf, item, 0);
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, item, flags);
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC);
btrfs_mark_buffer_dirty(leaf);
out:
btrfs_free_path(path);
@@ -4063,6 +4036,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->block_group->start,
rc->block_group->length);
ret = btrfs_zone_finish(rc->block_group);
WARN_ON(ret && ret != -EAGAIN);
while (1) {
int finishes_stage;
@@ -4386,8 +4362,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
if (!rc)
return 0;
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=

View File

@@ -57,7 +57,7 @@ struct scrub_ctx;
struct scrub_recover {
refcount_t refs;
struct btrfs_bio *bbio;
struct btrfs_io_context *bioc;
u64 map_length;
};
@@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
{
return spage->recover &&
(spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
(spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
}
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
{
if (refcount_dec_and_test(&recover->refs)) {
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(recover->bbio);
btrfs_put_bioc(recover->bioc);
kfree(recover);
}
}
@@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sblock_other = sblocks_for_recheck + mirror_index;
} else {
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
int max_allowed = r->bbio->num_stripes -
r->bbio->num_tgtdevs;
int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
if (mirror_index >= max_allowed)
break;
@@ -1218,14 +1217,14 @@ out:
return 0;
}
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
{
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
return 2;
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
return 3;
else
return (int)bbio->num_stripes;
return (int)bioc->num_stripes;
}
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
@@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
u64 flags = original_sblock->pagev[0]->flags;
u64 have_csum = original_sblock->pagev[0]->have_csum;
struct scrub_recover *recover;
struct btrfs_bio *bbio;
struct btrfs_io_context *bioc;
u64 sublen;
u64 mapped_length;
u64 stripe_offset;
@@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
while (length > 0) {
sublen = min_t(u64, length, fs_info->sectorsize);
mapped_length = sublen;
bbio = NULL;
bioc = NULL;
/*
* With a length of sectorsize, each returned stripe represents
@@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
*/
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
logical, &mapped_length, &bbio);
if (ret || !bbio || mapped_length < sublen) {
btrfs_put_bbio(bbio);
logical, &mapped_length, &bioc);
if (ret || !bioc || mapped_length < sublen) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
return -EIO;
}
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
if (!recover) {
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
return -ENOMEM;
}
refcount_set(&recover->refs, 1);
recover->bbio = bbio;
recover->bioc = bioc;
recover->map_length = mapped_length;
BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
for (mirror_index = 0; mirror_index < nmirrors;
mirror_index++) {
@@ -1348,17 +1347,17 @@ leave_nomem:
sctx->fs_info->csum_size);
scrub_stripe_index_and_offset(logical,
bbio->map_type,
bbio->raid_map,
bioc->map_type,
bioc->raid_map,
mapped_length,
bbio->num_stripes -
bbio->num_tgtdevs,
bioc->num_stripes -
bioc->num_tgtdevs,
mirror_index,
&stripe_index,
&stripe_offset);
spage->physical = bbio->stripes[stripe_index].physical +
spage->physical = bioc->stripes[stripe_index].physical +
stripe_offset;
spage->dev = bbio->stripes[stripe_index].dev;
spage->dev = bioc->stripes[stripe_index].dev;
BUG_ON(page_index >= original_sblock->page_count);
spage->physical_for_dev_replace =
@@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
bio->bi_end_io = scrub_bio_wait_endio;
mirror_num = spage->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
ret = raid56_parity_recover(bio, spage->recover->bioc,
spage->recover->map_length,
mirror_num, 0);
if (ret)
@@ -1423,7 +1422,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
if (!first_page->dev->bdev)
goto out;
bio = btrfs_io_bio_alloc(BIO_MAX_VECS);
bio = btrfs_bio_alloc(BIO_MAX_VECS);
bio_set_dev(bio, first_page->dev->bdev);
for (page_num = 0; page_num < sblock->page_count; page_num++) {
@@ -1480,7 +1479,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
WARN_ON(!spage->page);
bio = btrfs_io_bio_alloc(1);
bio = btrfs_bio_alloc(1);
bio_set_dev(bio, spage->dev->bdev);
bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
@@ -1562,7 +1561,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
bio = btrfs_io_bio_alloc(1);
bio = btrfs_bio_alloc(1);
bio_set_dev(bio, spage_bad->dev->bdev);
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
bio->bi_opf = REQ_OP_WRITE;
@@ -1676,7 +1675,7 @@ again:
sbio->dev = sctx->wr_tgtdev;
bio = sbio->bio;
if (!bio) {
bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
bio = btrfs_bio_alloc(sctx->pages_per_wr_bio);
sbio->bio = bio;
}
@@ -2102,7 +2101,7 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
bio = btrfs_bio_alloc(sctx->pages_per_rd_bio);
sbio->bio = bio;
}
@@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
struct btrfs_fs_info *fs_info = sctx->fs_info;
u64 length = sblock->page_count * PAGE_SIZE;
u64 logical = sblock->pagev[0]->logical;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
struct bio *bio;
struct btrfs_raid_bio *rbio;
int ret;
@@ -2211,27 +2210,27 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bbio);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
&length, &bioc);
if (ret || !bioc || !bioc->raid_map)
goto bioc_out;
if (WARN_ON(!sctx->is_dev_replace ||
!(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
!(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
/*
* We shouldn't be scrubbing a missing device. Even for dev
* replace, we should only get here for RAID 5/6. We either
* managed to mount something with no mirrors remaining or
* there's a bug in scrub_remap_extent()/btrfs_map_block().
*/
goto bbio_out;
goto bioc_out;
}
bio = btrfs_io_bio_alloc(0);
bio = btrfs_bio_alloc(BIO_MAX_VECS);
bio->bi_iter.bi_sector = logical >> 9;
bio->bi_private = sblock;
bio->bi_end_io = scrub_missing_raid56_end_io;
rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
rbio = raid56_alloc_missing_rbio(bio, bioc, length);
if (!rbio)
goto rbio_out;
@@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
rbio_out:
bio_put(bio);
bbio_out:
bioc_out:
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
@@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct bio *bio;
struct btrfs_raid_bio *rbio;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
u64 length;
int ret;
@@ -2838,17 +2837,17 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
&length, &bbio);
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
&length, &bioc);
if (ret || !bioc || !bioc->raid_map)
goto bioc_out;
bio = btrfs_io_bio_alloc(0);
bio = btrfs_bio_alloc(BIO_MAX_VECS);
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
bio->bi_private = sparity;
bio->bi_end_io = scrub_parity_bio_endio;
rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
length, sparity->scrub_dev,
rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length,
sparity->scrub_dev,
sparity->dbitmap,
sparity->nsectors);
if (!rbio)
@@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
rbio_out:
bio_put(bio);
bbio_out:
bioc_out:
btrfs_bio_counter_dec(fs_info);
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
spin_lock(&sctx->stat_lock);
@@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_root *csum_root = fs_info->csum_root;
struct btrfs_extent_item *extent;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
u64 flags;
int ret;
int slot;
@@ -3044,22 +3043,22 @@ again:
extent_len);
mapped_length = extent_len;
bbio = NULL;
bioc = NULL;
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
extent_logical, &mapped_length, &bbio,
extent_logical, &mapped_length, &bioc,
0);
if (!ret) {
if (!bbio || mapped_length < extent_len)
if (!bioc || mapped_length < extent_len)
ret = -EIO;
}
if (ret) {
btrfs_put_bbio(bbio);
btrfs_put_bioc(bioc);
goto out;
}
extent_physical = bbio->stripes[0].physical;
extent_mirror_num = bbio->mirror_num;
extent_dev = bbio->stripes[0].dev;
btrfs_put_bbio(bbio);
extent_physical = bioc->stripes[0].physical;
extent_mirror_num = bioc->mirror_num;
extent_dev = bioc->stripes[0].dev;
btrfs_put_bioc(bioc);
ret = btrfs_lookup_csums_range(csum_root,
extent_logical,
@@ -3956,7 +3955,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
int ret;
struct btrfs_fs_info *fs_info = sctx->fs_info;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
if (BTRFS_FS_ERROR(fs_info))
return -EROFS;
/* Seed devices of a new filesystem has their own generation. */
@@ -4068,6 +4067,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace)
{
struct btrfs_dev_lookup_args args = { .devid = devid };
struct scrub_ctx *sctx;
int ret;
struct btrfs_device *dev;
@@ -4115,7 +4115,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
goto out_free_ctx;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
dev = btrfs_find_device(fs_info->fs_devices, &args);
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
!is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -4288,11 +4288,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress)
{
struct btrfs_dev_lookup_args args = { .devid = devid };
struct btrfs_device *dev;
struct scrub_ctx *sctx = NULL;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
dev = btrfs_find_device(fs_info->fs_devices, &args);
if (dev)
sctx = dev->scrub_ctx;
if (sctx)
@@ -4309,20 +4310,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
int *extent_mirror_num)
{
u64 mapped_length;
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
int ret;
mapped_length = extent_len;
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
&mapped_length, &bbio, 0);
if (ret || !bbio || mapped_length < extent_len ||
!bbio->stripes[0].dev->bdev) {
btrfs_put_bbio(bbio);
&mapped_length, &bioc, 0);
if (ret || !bioc || mapped_length < extent_len ||
!bioc->stripes[0].dev->bdev) {
btrfs_put_bioc(bioc);
return;
}
*extent_physical = bbio->stripes[0].physical;
*extent_mirror_num = bbio->mirror_num;
*extent_dev = bbio->stripes[0].dev;
btrfs_put_bbio(bbio);
*extent_physical = bioc->stripes[0].physical;
*extent_mirror_num = bioc->mirror_num;
*extent_dev = bioc->stripes[0].dev;
btrfs_put_bioc(bioc);
}

View File

@@ -84,6 +84,8 @@ struct send_ctx {
u64 total_send_size;
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
/* Protocol version compatibility requested */
u32 proto;
struct btrfs_root *send_root;
struct btrfs_root *parent_root;
@@ -312,6 +314,16 @@ static void inconsistent_snapshot_error(struct send_ctx *sctx,
sctx->parent_root->root_key.objectid : 0));
}
__maybe_unused
static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
{
switch (sctx->proto) {
case 1: return cmd < __BTRFS_SEND_C_MAX_V1;
case 2: return cmd < __BTRFS_SEND_C_MAX_V2;
default: return false;
}
}
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
@@ -2720,19 +2732,12 @@ static int send_create_inode_if_needed(struct send_ctx *sctx)
if (S_ISDIR(sctx->cur_inode_mode)) {
ret = did_create_dir(sctx, sctx->cur_ino);
if (ret < 0)
goto out;
if (ret) {
ret = 0;
goto out;
}
return ret;
else if (ret > 0)
return 0;
}
ret = send_create_inode(sctx, sctx->cur_ino);
if (ret < 0)
goto out;
out:
return ret;
return send_create_inode(sctx, sctx->cur_ino);
}
struct recorded_ref {
@@ -7276,6 +7281,17 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
sctx->flags = arg->flags;
if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
if (arg->version > BTRFS_SEND_STREAM_VERSION) {
ret = -EPROTO;
goto out;
}
/* Zero means "use the highest version" */
sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
} else {
sctx->proto = 1;
}
sctx->send_filp = fget(arg->send_fd);
if (!sctx->send_filp) {
ret = -EBADF;

View File

@@ -48,6 +48,7 @@ struct btrfs_tlv_header {
enum btrfs_send_cmd {
BTRFS_SEND_C_UNSPEC,
/* Version 1 */
BTRFS_SEND_C_SUBVOL,
BTRFS_SEND_C_SNAPSHOT,
@@ -76,6 +77,12 @@ enum btrfs_send_cmd {
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
__BTRFS_SEND_C_MAX_V1,
/* Version 2 */
__BTRFS_SEND_C_MAX_V2,
/* End */
__BTRFS_SEND_C_MAX,
};
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)

View File

@@ -885,6 +885,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
{
struct reserve_ticket *ticket;
u64 tickets_id = space_info->tickets_id;
const bool aborted = BTRFS_FS_ERROR(fs_info);
trace_btrfs_fail_all_tickets(fs_info, space_info);
@@ -898,16 +899,19 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
if (ticket->steal &&
if (!aborted && ticket->steal &&
steal_from_global_rsv(fs_info, space_info, ticket))
return true;
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
btrfs_info(fs_info, "failing ticket with %llu bytes",
ticket->bytes);
remove_ticket(space_info, ticket);
ticket->error = -ENOSPC;
if (aborted)
ticket->error = -EIO;
else
ticket->error = -ENOSPC;
wake_up(&ticket->wait);
/*
@@ -916,7 +920,8 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
* here to see if we can make progress with the next ticket in
* the list.
*/
btrfs_try_granting_tickets(fs_info, space_info);
if (!aborted)
btrfs_try_granting_tickets(fs_info, space_info);
}
return (tickets_id != space_info->tickets_id);
}
@@ -1172,6 +1177,10 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
spin_unlock(&space_info->lock);
return;
}
/* Something happened, fail everything and bail. */
if (BTRFS_FS_ERROR(fs_info))
goto aborted_fs;
last_tickets_id = space_info->tickets_id;
spin_unlock(&space_info->lock);
}
@@ -1202,9 +1211,20 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
} else {
flush_state = 0;
}
/* Something happened, fail everything and bail. */
if (BTRFS_FS_ERROR(fs_info))
goto aborted_fs;
}
spin_unlock(&space_info->lock);
}
return;
aborted_fs:
maybe_fail_all_tickets(fs_info, space_info);
space_info->flush = 0;
spin_unlock(&space_info->lock);
}
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)

View File

@@ -63,11 +63,41 @@
* This means a slightly higher tree locking latency.
*/
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
{
unsigned int cur = 0;
unsigned int nr_bits;
ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
nr_bits = PAGE_SIZE / sectorsize;
subpage_info->bitmap_nr_bits = nr_bits;
subpage_info->uptodate_offset = cur;
cur += nr_bits;
subpage_info->error_offset = cur;
cur += nr_bits;
subpage_info->dirty_offset = cur;
cur += nr_bits;
subpage_info->writeback_offset = cur;
cur += nr_bits;
subpage_info->ordered_offset = cur;
cur += nr_bits;
subpage_info->checked_offset = cur;
cur += nr_bits;
subpage_info->total_nr_bits = cur;
}
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
struct page *page, enum btrfs_subpage_type type)
{
struct btrfs_subpage *subpage = NULL;
int ret;
struct btrfs_subpage *subpage;
/*
* We have cases like a dummy extent buffer page, which is not mappped
@@ -75,13 +105,15 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
*/
if (page->mapping)
ASSERT(PageLocked(page));
/* Either not subpage, or the page already has private attached */
if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
return 0;
ret = btrfs_alloc_subpage(fs_info, &subpage, type);
if (ret < 0)
return ret;
subpage = btrfs_alloc_subpage(fs_info, type);
if (IS_ERR(subpage))
return PTR_ERR(subpage);
attach_page_private(page, subpage);
return 0;
}
@@ -100,24 +132,28 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
btrfs_free_subpage(subpage);
}
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
struct btrfs_subpage **ret,
enum btrfs_subpage_type type)
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
enum btrfs_subpage_type type)
{
if (fs_info->sectorsize == PAGE_SIZE)
return 0;
struct btrfs_subpage *ret;
unsigned int real_size;
*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
if (!*ret)
return -ENOMEM;
spin_lock_init(&(*ret)->lock);
ASSERT(fs_info->sectorsize < PAGE_SIZE);
real_size = struct_size(ret, bitmaps,
BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));
ret = kzalloc(real_size, GFP_NOFS);
if (!ret)
return ERR_PTR(-ENOMEM);
spin_lock_init(&ret->lock);
if (type == BTRFS_SUBPAGE_METADATA) {
atomic_set(&(*ret)->eb_refs, 0);
atomic_set(&ret->eb_refs, 0);
} else {
atomic_set(&(*ret)->readers, 0);
atomic_set(&(*ret)->writers, 0);
atomic_set(&ret->readers, 0);
atomic_set(&ret->writers, 0);
}
return 0;
return ret;
}
void btrfs_free_subpage(struct btrfs_subpage *subpage)
@@ -222,8 +258,16 @@ static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
u32 orig_len = *len;
*start = max_t(u64, page_offset(page), orig_start);
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
orig_start + orig_len) - *start;
/*
* For certain call sites like btrfs_drop_pages(), we may have pages
* beyond the target range. In that case, just set @len to 0, subpage
* helpers can handle @len == 0 without any problem.
*/
if (page_offset(page) >= orig_start + orig_len)
*len = 0;
else
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
orig_start + orig_len) - *start;
}
void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
@@ -248,6 +292,16 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
btrfs_subpage_assert(fs_info, page, start, len);
/*
* We have call sites passing @lock_page into
* extent_clear_unlock_delalloc() for compression path.
*
* This @locked_page is locked by plain lock_page(), thus its
* subpage::writers is 0. Handle them in a special way.
*/
if (atomic_read(&subpage->writers) == 0)
return true;
ASSERT(atomic_read(&subpage->writers) >= nbits);
return atomic_sub_and_test(nbits, &subpage->writers);
}
@@ -289,37 +343,59 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
unlock_page(page);
}
/*
* Convert the [start, start + len) range into a u16 bitmap
*
* For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
*/
static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start,
unsigned int nbits)
{
const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
const int nbits = len >> fs_info->sectorsize_bits;
unsigned int found_zero;
btrfs_subpage_assert(fs_info, page, start, len);
/*
* Here nbits can be 16, thus can go beyond u16 range. We make the
* first left shift to be calculate in unsigned long (at least u32),
* then truncate the result to u16.
*/
return (u16)(((1UL << nbits) - 1) << bit_start);
found_zero = find_next_zero_bit(addr, start + nbits, start);
if (found_zero == start + nbits)
return true;
return false;
}
static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start,
unsigned int nbits)
{
unsigned int found_set;
found_set = find_next_bit(addr, start + nbits, start);
if (found_set == start + nbits)
return true;
return false;
}
#define subpage_calc_start_bit(fs_info, page, name, start, len) \
({ \
unsigned int start_bit; \
\
btrfs_subpage_assert(fs_info, page, start, len); \
start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
start_bit += fs_info->subpage_info->name##_offset; \
start_bit; \
})
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
bitmap_test_range_all_set(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
fs_info->subpage_info->bitmap_nr_bits)
#define subpage_test_bitmap_all_zero(fs_info, subpage, name) \
bitmap_test_range_all_zero(subpage->bitmaps, \
fs_info->subpage_info->name##_offset, \
fs_info->subpage_info->bitmap_nr_bits)
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
uptodate, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->uptodate_bitmap |= tmp;
if (subpage->uptodate_bitmap == U16_MAX)
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate))
SetPageUptodate(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -328,11 +404,12 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
uptodate, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->uptodate_bitmap &= ~tmp;
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
ClearPageUptodate(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -341,11 +418,12 @@ void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
error, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->error_bitmap |= tmp;
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
SetPageError(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -354,12 +432,13 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
error, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->error_bitmap &= ~tmp;
if (subpage->error_bitmap == 0)
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
ClearPageError(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -368,11 +447,12 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
dirty, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->dirty_bitmap |= tmp;
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&subpage->lock, flags);
set_page_dirty(page);
}
@@ -391,13 +471,14 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
dirty, start, len);
unsigned long flags;
bool last = false;
spin_lock_irqsave(&subpage->lock, flags);
subpage->dirty_bitmap &= ~tmp;
if (subpage->dirty_bitmap == 0)
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty))
last = true;
spin_unlock_irqrestore(&subpage->lock, flags);
return last;
@@ -417,11 +498,12 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
writeback, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->writeback_bitmap |= tmp;
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
set_page_writeback(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -430,12 +512,13 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
writeback, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->writeback_bitmap &= ~tmp;
if (subpage->writeback_bitmap == 0) {
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) {
ASSERT(PageWriteback(page));
end_page_writeback(page);
}
@@ -446,11 +529,12 @@ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
ordered, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->ordered_bitmap |= tmp;
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
SetPageOrdered(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
@@ -459,15 +543,46 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
ordered, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
subpage->ordered_bitmap &= ~tmp;
if (subpage->ordered_bitmap == 0)
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered))
ClearPageOrdered(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
checked, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_set(fs_info, subpage, checked))
SetPageChecked(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
checked, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
ClearPageChecked(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
/*
* Unlike set/clear which is dependent on each page status, for test all bits
* are tested in the same way.
@@ -477,12 +592,14 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
struct page *page, u64 start, u32 len) \
{ \
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); \
unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \
name, start, len); \
unsigned long flags; \
bool ret; \
\
spin_lock_irqsave(&subpage->lock, flags); \
ret = ((subpage->name##_bitmap & tmp) == tmp); \
ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \
len >> fs_info->sectorsize_bits); \
spin_unlock_irqrestore(&subpage->lock, flags); \
return ret; \
}
@@ -491,6 +608,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
/*
* Note that, in selftests (extent-io-tests), we can have empty fs_info passed
@@ -561,6 +679,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
PageWriteback);
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
PageOrdered);
IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked);
/*
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
@@ -579,5 +698,48 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
return;
ASSERT(PagePrivate(page) && page->private);
ASSERT(subpage->dirty_bitmap == 0);
ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty));
}
/*
* Handle different locked pages with different page sizes:
*
* - Page locked by plain lock_page()
* It should not have any subpage::writers count.
* Can be unlocked by unlock_page().
* This is the most common locked page for __extent_writepage() called
* inside extent_write_cache_pages() or extent_write_full_page().
* Rarer cases include the @locked_page from extent_write_locked_range().
*
* - Page locked by lock_delalloc_pages()
* There is only one caller, all pages except @locked_page for
* extent_write_locked_range().
* In this case, we have to call subpage helper to handle the case.
*/
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
u64 start, u32 len)
{
struct btrfs_subpage *subpage;
ASSERT(PageLocked(page));
/* For regular page size case, we just unlock the page */
if (fs_info->sectorsize == PAGE_SIZE)
return unlock_page(page);
ASSERT(PagePrivate(page) && page->private);
subpage = (struct btrfs_subpage *)page->private;
/*
* For subpage case, there are two types of locked page. With or
* without writers number.
*
* Since we own the page lock, no one else could touch subpage::writers
* and we are safe to do several atomic operations without spinlock.
*/
if (atomic_read(&subpage->writers))
/* No writers, locked by plain lock_page() */
return unlock_page(page);
/* Have writers, use proper subpage helper to end it */
btrfs_page_end_writer_lock(fs_info, page, start, len);
}

View File

@@ -6,10 +6,38 @@
#include <linux/spinlock.h>
/*
* Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
* is sufficient. Regular bitmap_* is not used due to size reasons.
* Extra info for subpapge bitmap.
*
* For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into
* one larger bitmap.
*
* This structure records how they are organized in the bitmap:
*
* /- uptodate_offset /- error_offset /- dirty_offset
* | | |
* v v v
* |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o|
* |<- bitmap_nr_bits ->|
* |<--------------- total_nr_bits ---------------->|
*/
#define BTRFS_SUBPAGE_BITMAP_SIZE 16
struct btrfs_subpage_info {
/* Number of bits for each bitmap */
unsigned int bitmap_nr_bits;
/* Total number of bits for the whole bitmap */
unsigned int total_nr_bits;
/*
* *_start indicates where the bitmap starts, the length is always
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
unsigned int error_offset;
unsigned int dirty_offset;
unsigned int writeback_offset;
unsigned int ordered_offset;
unsigned int checked_offset;
};
/*
* Structure to trace status of each sector inside a page, attached to
@@ -18,10 +46,6 @@
struct btrfs_subpage {
/* Common members for both data and metadata pages */
spinlock_t lock;
u16 uptodate_bitmap;
u16 error_bitmap;
u16 dirty_bitmap;
u16 writeback_bitmap;
/*
* Both data and metadata needs to track how many readers are for the
* page.
@@ -38,14 +62,11 @@ struct btrfs_subpage {
* manages whether the subpage can be detached.
*/
atomic_t eb_refs;
/* Structures only used by data */
struct {
atomic_t writers;
/* Tracke pending ordered extent in this sector */
u16 ordered_bitmap;
};
/* Structures only used by data */
atomic_t writers;
};
unsigned long bitmaps[];
};
enum btrfs_subpage_type {
@@ -53,15 +74,15 @@ enum btrfs_subpage_type {
BTRFS_SUBPAGE_DATA,
};
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
struct page *page, enum btrfs_subpage_type type);
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
struct page *page);
/* Allocate additional data where page represents more than one sector */
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
struct btrfs_subpage **ret,
enum btrfs_subpage_type type);
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
enum btrfs_subpage_type type);
void btrfs_free_subpage(struct btrfs_subpage *subpage);
void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
@@ -122,11 +143,14 @@ DECLARE_BTRFS_SUBPAGE_OPS(error);
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
DECLARE_BTRFS_SUBPAGE_OPS(checked);
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len);
void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct page *page);
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
u64 start, u32 len);
#endif

View File

@@ -1705,7 +1705,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
goto error_close_devices;
}
bdev = fs_devices->latest_bdev;
bdev = fs_devices->latest_dev->bdev;
s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
fs_info);
if (IS_ERR(s)) {
@@ -2006,7 +2006,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
} else {
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
if (BTRFS_FS_ERROR(fs_info)) {
btrfs_err(fs_info,
"Remounting read-write after error is not allowed");
ret = -EINVAL;
@@ -2463,30 +2463,16 @@ static int btrfs_unfreeze(struct super_block *sb)
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
struct btrfs_device *dev, *first_dev = NULL;
/*
* Lightweight locking of the devices. We should not need
* device_list_mutex here as we only read the device data and the list
* is protected by RCU. Even if a device is deleted during the list
* traversals, we'll get valid data, the freeing callback will wait at
* least until the rcu_read_unlock.
* There should be always a valid pointer in latest_dev, it may be stale
* for a short moment in case it's being deleted but still valid until
* the end of RCU grace period.
*/
rcu_read_lock();
list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue;
if (!dev->name)
continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
if (first_dev)
seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
else
WARN_ON(1);
seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
rcu_read_unlock();
return 0;
}

View File

@@ -177,7 +177,7 @@ static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
} else
val = can_modify_feature(fa);
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
return sysfs_emit(buf, "%d\n", val);
}
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
@@ -330,7 +330,7 @@ static const struct attribute_group btrfs_feature_attr_group = {
static ssize_t rmdir_subvol_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "0\n");
return sysfs_emit(buf, "0\n");
}
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
@@ -345,12 +345,12 @@ static ssize_t supported_checksums_show(struct kobject *kobj,
* This "trick" only works as long as 'enum btrfs_csum_type' has
* no holes in it
*/
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
(i == 0 ? "" : " "), btrfs_super_csum_name(i));
ret += sysfs_emit_at(buf, ret, "%s%s", (i == 0 ? "" : " "),
btrfs_super_csum_name(i));
}
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
@@ -358,7 +358,7 @@ BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
static ssize_t send_stream_version_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", BTRFS_SEND_STREAM_VERSION);
return sysfs_emit(buf, "%d\n", BTRFS_SEND_STREAM_VERSION);
}
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
@@ -378,9 +378,8 @@ static ssize_t supported_rescue_options_show(struct kobject *kobj,
int i;
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
(i ? " " : ""), rescue_opts[i]);
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
ret += sysfs_emit_at(buf, ret, "%s%s", (i ? " " : ""), rescue_opts[i]);
ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
BTRFS_ATTR(static_feature, supported_rescue_options,
@@ -394,10 +393,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
/* 4K sector size is also supported with 64K page size */
if (PAGE_SIZE == SZ_64K)
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K);
ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
/* Only sectorsize == PAGE_SIZE is now supported */
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);
ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
return ret;
}
@@ -437,7 +436,7 @@ static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%lld\n",
return sysfs_emit(buf, "%lld\n",
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
}
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
@@ -448,7 +447,7 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%d\n",
return sysfs_emit(buf, "%d\n",
atomic_read(&fs_info->discard_ctl.discardable_extents));
}
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
@@ -459,8 +458,8 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n",
fs_info->discard_ctl.discard_bitmap_bytes);
return sysfs_emit(buf, "%llu\n",
fs_info->discard_ctl.discard_bitmap_bytes);
}
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
@@ -470,7 +469,7 @@ static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%lld\n",
return sysfs_emit(buf, "%lld\n",
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
}
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
@@ -481,8 +480,8 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n",
fs_info->discard_ctl.discard_extent_bytes);
return sysfs_emit(buf, "%llu\n",
fs_info->discard_ctl.discard_extent_bytes);
}
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
@@ -492,8 +491,8 @@ static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%u\n",
READ_ONCE(fs_info->discard_ctl.iops_limit));
return sysfs_emit(buf, "%u\n",
READ_ONCE(fs_info->discard_ctl.iops_limit));
}
static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
@@ -523,8 +522,8 @@ static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%u\n",
READ_ONCE(fs_info->discard_ctl.kbps_limit));
return sysfs_emit(buf, "%u\n",
READ_ONCE(fs_info->discard_ctl.kbps_limit));
}
static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
@@ -553,8 +552,8 @@ static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n",
READ_ONCE(fs_info->discard_ctl.max_discard_size));
return sysfs_emit(buf, "%llu\n",
READ_ONCE(fs_info->discard_ctl.max_discard_size));
}
static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
@@ -627,7 +626,7 @@ static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
val = *value_ptr;
if (lock)
spin_unlock(lock);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
return sysfs_emit(buf, "%llu\n", val);
}
static ssize_t global_rsv_size_show(struct kobject *kobj,
@@ -673,7 +672,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
val += block_group->used;
}
up_read(&sinfo->groups_sem);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
return sysfs_emit(buf, "%llu\n", val);
}
/*
@@ -771,7 +770,7 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
ssize_t ret;
spin_lock(&fs_info->super_lock);
ret = scnprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
ret = sysfs_emit(buf, label[0] ? "%s\n" : "%s", label);
spin_unlock(&fs_info->super_lock);
return ret;
@@ -819,7 +818,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
}
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
@@ -829,8 +828,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%u\n",
fs_info->super_copy->sectorsize);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
@@ -840,7 +838,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
}
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
@@ -852,7 +850,7 @@ static ssize_t quota_override_show(struct kobject *kobj,
int quota_override;
quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
return scnprintf(buf, PAGE_SIZE, "%d\n", quota_override);
return sysfs_emit(buf, "%d\n", quota_override);
}
static ssize_t quota_override_store(struct kobject *kobj,
@@ -890,8 +888,7 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%pU\n",
fs_info->fs_devices->metadata_uuid);
return sysfs_emit(buf, "%pU\n", fs_info->fs_devices->metadata_uuid);
}
BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
@@ -902,9 +899,9 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
return scnprintf(buf, PAGE_SIZE, "%s (%s)\n",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(fs_info->csum_shash));
return sysfs_emit(buf, "%s (%s)\n",
btrfs_super_csum_name(csum_type),
crypto_shash_driver_name(fs_info->csum_shash));
}
BTRFS_ATTR(, checksum, btrfs_checksum_show);
@@ -941,7 +938,7 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
str = "UNKNOWN\n";
break;
}
return scnprintf(buf, PAGE_SIZE, "%s", str);
return sysfs_emit(buf, "%s", str);
}
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
@@ -950,7 +947,7 @@ static ssize_t btrfs_generation_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation);
return sysfs_emit(buf, "%llu\n", fs_info->generation);
}
BTRFS_ATTR(, generation, btrfs_generation_show);
@@ -1028,8 +1025,7 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
ssize_t ret;
ret = scnprintf(buf, PAGE_SIZE, "%d\n",
READ_ONCE(fs_info->bg_reclaim_threshold));
ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
return ret;
}
@@ -1471,7 +1467,7 @@ static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
return sysfs_emit(buf, "%d\n", val);
}
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
@@ -1484,7 +1480,7 @@ static ssize_t btrfs_devinfo_missing_show(struct kobject *kobj,
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
return sysfs_emit(buf, "%d\n", val);
}
BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show);
@@ -1498,7 +1494,7 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
return sysfs_emit(buf, "%d\n", val);
}
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
@@ -1509,8 +1505,7 @@ static ssize_t btrfs_devinfo_scrub_speed_max_show(struct kobject *kobj,
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
devid_kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n",
READ_ONCE(device->scrub_speed_max));
return sysfs_emit(buf, "%llu\n", READ_ONCE(device->scrub_speed_max));
}
static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj,
@@ -1538,7 +1533,7 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
return sysfs_emit(buf, "%d\n", val);
}
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
@@ -1549,14 +1544,14 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
devid_kobj);
if (!device->dev_stats_valid)
return scnprintf(buf, PAGE_SIZE, "invalid\n");
return sysfs_emit(buf, "invalid\n");
/*
* Print all at once so we get a snapshot of all values from the same
* time. Keep them in sync and in order of definition of
* btrfs_dev_stat_values.
*/
return scnprintf(buf, PAGE_SIZE,
return sysfs_emit(buf,
"write_errs %d\n"
"read_errs %d\n"
"flush_errs %d\n"

View File

@@ -60,7 +60,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = 0;
setup_items_for_insert(root, path, &key, &value_len, 1);
btrfs_setup_item_for_insert(root, path, &key, value_len);
item = btrfs_item_nr(0);
write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
value_len);

View File

@@ -112,7 +112,7 @@ static int test_find_delalloc(u32 sectorsize)
*/
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
start = 0;
end = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
@@ -143,7 +143,7 @@ static int test_find_delalloc(u32 sectorsize)
}
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
start = test_start;
end = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
@@ -177,14 +177,14 @@ static int test_find_delalloc(u32 sectorsize)
goto out_bits;
}
start = test_start;
end = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (found) {
test_err("found range when we shouldn't have");
goto out_bits;
}
if (end != (u64)-1) {
if (end != test_start + PAGE_SIZE - 1) {
test_err("did not return the proper end offset");
goto out_bits;
}
@@ -198,7 +198,7 @@ static int test_find_delalloc(u32 sectorsize)
*/
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
start = test_start;
end = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
&end);
if (!found) {
@@ -233,7 +233,7 @@ static int test_find_delalloc(u32 sectorsize)
/* We unlocked it in the previous test */
lock_page(locked_page);
start = test_start;
end = 0;
end = start + PAGE_SIZE - 1;
/*
* Currently if we fail to find dirty pages in the delalloc range we
* will adjust max_bytes down to PAGE_SIZE and then re-search. If

View File

@@ -33,7 +33,7 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
setup_items_for_insert(root, &path, &key, &value_len, 1);
btrfs_setup_item_for_insert(root, &path, &key, value_len);
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, fi, 1);
btrfs_set_file_extent_type(leaf, fi, type);
@@ -63,7 +63,7 @@ static void insert_inode_item_key(struct btrfs_root *root)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
setup_items_for_insert(root, &path, &key, &value_len, 1);
btrfs_setup_item_for_insert(root, &path, &key, value_len);
}
/*

View File

@@ -283,7 +283,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->trans_lock);
loop:
/* The file system has been taken offline. No new transactions. */
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
if (BTRFS_FS_ERROR(fs_info)) {
spin_unlock(&fs_info->trans_lock);
return -EROFS;
}
@@ -331,7 +331,7 @@ loop:
*/
kfree(cur_trans);
goto loop;
} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
} else if (BTRFS_FS_ERROR(fs_info)) {
spin_unlock(&fs_info->trans_lock);
kfree(cur_trans);
return -EROFS;
@@ -579,7 +579,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
bool do_chunk_alloc = false;
int ret;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
if (BTRFS_FS_ERROR(fs_info))
return ERR_PTR(-EROFS);
if (current->journal_info) {
@@ -991,8 +991,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (throttle)
btrfs_run_delayed_iputs(info);
if (TRANS_ABORTED(trans) ||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
wake_up_process(info->transaction_kthread);
if (TRANS_ABORTED(trans))
err = trans->aborted;
@@ -2155,7 +2154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* abort to prevent writing a new superblock that reflects a
* corrupt state (pointing to trees with unwritten nodes/leafs).
*/
if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
if (BTRFS_FS_ERROR(fs_info)) {
ret = -EROFS;
goto cleanup_transaction;
}

File diff suppressed because it is too large Load Diff

View File

@@ -17,6 +17,8 @@ struct btrfs_log_ctx {
int log_transid;
bool log_new_dentries;
bool logging_new_name;
/* Tracks the last logged dir item/index key offset. */
u64 last_dir_item_offset;
struct inode *inode;
struct list_head list;
/* Only used for fast fsyncs. */
@@ -68,14 +70,14 @@ int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct dentry *dentry,
struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct btrfs_inode *dir, u64 index);
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct btrfs_inode *inode, u64 dirid);
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct btrfs_inode *dir, u64 index);
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
struct btrfs_inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
void btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,

File diff suppressed because it is too large Load Diff

View File

@@ -236,17 +236,40 @@ struct btrfs_fs_devices {
bool fsid_change;
struct list_head fs_list;
/*
* Number of devices under this fsid including missing and
* replace-target device and excludes seed devices.
*/
u64 num_devices;
/*
* The number of devices that successfully opened, including
* replace-target, excludes seed devices.
*/
u64 open_devices;
/* The number of devices that are under the chunk allocation list. */
u64 rw_devices;
/* Count of missing devices under this fsid excluding seed device. */
u64 missing_devices;
u64 total_rw_bytes;
/*
* Count of devices from btrfs_super_block::num_devices for this fsid,
* which includes the seed device, excludes the transient replace-target
* device.
*/
u64 total_devices;
/* Highest generation number of seen devices */
u64 latest_generation;
struct block_device *latest_bdev;
/*
* The mount device or a device with highest generation after removal
* or replace.
*/
struct btrfs_device *latest_dev;
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
@@ -300,48 +323,62 @@ struct btrfs_fs_devices {
/ sizeof(struct btrfs_stripe) + 1)
/*
* we need the mirror number and stripe index to be passed around
* the call chain while we are processing end_io (especially errors).
* Really, what we need is a btrfs_bio structure that has this info
* and is properly sized with its stripe array, but we're not there
* quite yet. We have our own btrfs bioset, and all of the bios
* we allocate are actually btrfs_io_bios. We'll cram as much of
* struct btrfs_bio as we can into this over time.
* Additional info to pass along bio.
*
* Mostly for btrfs specific features like csum and mirror_num.
*/
struct btrfs_io_bio {
struct btrfs_bio {
unsigned int mirror_num;
/* @device is for stripe IO submission. */
struct btrfs_device *device;
u64 logical;
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
struct bvec_iter iter;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_io_bio but relies on bio being last.
* bytes for entire btrfs_bio but relies on bio being last.
*/
struct bio bio;
};
static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
{
return container_of(bio, struct btrfs_io_bio, bio);
return container_of(bio, struct btrfs_bio, bio);
}
static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio)
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
{
if (io_bio->csum != io_bio->csum_inline) {
kfree(io_bio->csum);
io_bio->csum = NULL;
if (bbio->csum != bbio->csum_inline) {
kfree(bbio->csum);
bbio->csum = NULL;
}
}
struct btrfs_bio_stripe {
struct btrfs_io_stripe {
struct btrfs_device *dev;
u64 physical;
u64 length; /* only used for discard mappings */
};
struct btrfs_bio {
/*
* Context for IO subsmission for device stripe.
*
* - Track the unfinished mirrors for mirror based profiles
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
*
* - Contain the logical -> physical mapping info
* Used by submit_stripe_bio() for mapping logical bio
* into physical device address.
*
* - Contain device replace info
* Used by handle_ops_on_dev_replace() to copy logical bios
* into the new device.
*
* - Contain RAID56 full stripe logical bytenrs
*/
struct btrfs_io_context {
refcount_t refs;
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
@@ -361,7 +398,7 @@ struct btrfs_bio {
* so raid_map[0] is the start of our full stripe
*/
u64 *raid_map;
struct btrfs_bio_stripe stripes[];
struct btrfs_io_stripe stripes[];
};
struct btrfs_device_info {
@@ -396,11 +433,11 @@ struct map_lookup {
int num_stripes;
int sub_stripes;
int verified_stripes; /* For mount time dev extent verification */
struct btrfs_bio_stripe stripes[];
struct btrfs_io_stripe stripes[];
};
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
(sizeof(struct btrfs_io_stripe) * (n)))
struct btrfs_balance_args;
struct btrfs_balance_progress;
@@ -414,6 +451,22 @@ struct btrfs_balance_control {
struct btrfs_balance_progress stat;
};
/*
* Search for a given device by the set parameters
*/
struct btrfs_dev_lookup_args {
u64 devid;
u8 *uuid;
u8 *fsid;
bool missing;
};
/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
#define BTRFS_DEV_LOOKUP_ARGS(name) \
struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
enum btrfs_map_op {
BTRFS_MAP_READ,
BTRFS_MAP_WRITE,
@@ -437,20 +490,20 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
}
}
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
void btrfs_get_bioc(struct btrfs_io_context *bioc);
void btrfs_put_bioc(struct btrfs_io_context *bioc);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
struct btrfs_io_context **bioc_ret, int mirror_num);
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret);
struct btrfs_io_context **bioc_ret);
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
enum btrfs_map_op op, u64 logical,
struct btrfs_io_geometry *io_geom);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
u64 type);
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
@@ -467,19 +520,23 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
u64 devid,
const char *devpath);
int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
const char *path);
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
const u8 *uuid);
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
const char *device_path, u64 devid,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode);
void __exit btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size);
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, u8 *uuid, u8 *fsid);
struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
const struct btrfs_dev_lookup_args *args);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
int btrfs_balance(struct btrfs_fs_info *fs_info,
@@ -493,7 +550,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_uuid_scan_kthread(void *data);
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);

View File

@@ -138,7 +138,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
* matches our target xattr, so lets check.
*/
ret = 0;
btrfs_assert_tree_locked(path->nodes[0]);
btrfs_assert_tree_write_locked(path->nodes[0]);
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
if (!di && !(flags & XATTR_REPLACE)) {
ret = -ENOSPC;

View File

@@ -4,6 +4,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/sched/mm.h>
#include <linux/atomic.h>
#include "ctree.h"
#include "volumes.h"
#include "zoned.h"
@@ -38,6 +39,16 @@
/* Number of superblock log zones */
#define BTRFS_NR_SB_LOG_ZONES 2
/*
* Minimum of active zones we need:
*
* - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
* - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
* - 1 zone for tree-log dedicated block group
* - 1 zone for relocation
*/
#define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5)
/*
* Maximum supported zone size. Currently, SMR disks have a zone size of
* 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
@@ -45,6 +56,14 @@
*/
#define BTRFS_MAX_ZONE_SIZE SZ_8G
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
static inline bool sb_zone_is_full(const struct blk_zone *zone)
{
return (zone->cond == BLK_ZONE_COND_FULL) ||
(zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity);
}
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
{
struct blk_zone *zones = data;
@@ -60,14 +79,13 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
bool empty[BTRFS_NR_SB_LOG_ZONES];
bool full[BTRFS_NR_SB_LOG_ZONES];
sector_t sector;
int i;
ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY);
empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY);
full[0] = (zones[0].cond == BLK_ZONE_COND_FULL);
full[1] = (zones[1].cond == BLK_ZONE_COND_FULL);
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
full[i] = sb_zone_is_full(&zones[i]);
}
/*
* Possible states of log buffer zones
@@ -296,6 +314,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_zoned_device_info *zone_info = NULL;
struct block_device *bdev = device->bdev;
struct request_queue *queue = bdev_get_queue(bdev);
unsigned int max_active_zones;
unsigned int nactive;
sector_t nr_sectors;
sector_t sector = 0;
struct blk_zone *zones = NULL;
@@ -351,6 +372,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
max_active_zones = queue_max_active_zones(queue);
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
btrfs_err_in_rcu(fs_info,
"zoned: %s: max active zones %u is too small, need at least %u active zones",
rcu_str_deref(device->name), max_active_zones,
BTRFS_MIN_ACTIVE_ZONES);
ret = -EINVAL;
goto out;
}
zone_info->max_active_zones = max_active_zones;
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->seq_zones) {
ret = -ENOMEM;
@@ -363,6 +395,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
goto out;
}
zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
if (!zone_info->active_zones) {
ret = -ENOMEM;
goto out;
}
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
ret = -ENOMEM;
@@ -370,6 +408,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
}
/* Get zones type */
nactive = 0;
while (sector < nr_sectors) {
nr_zones = BTRFS_REPORT_NR_ZONES;
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
@@ -380,8 +419,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
for (i = 0; i < nr_zones; i++) {
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
__set_bit(nreported, zone_info->seq_zones);
if (zones[i].cond == BLK_ZONE_COND_EMPTY)
switch (zones[i].cond) {
case BLK_ZONE_COND_EMPTY:
__set_bit(nreported, zone_info->empty_zones);
break;
case BLK_ZONE_COND_IMP_OPEN:
case BLK_ZONE_COND_EXP_OPEN:
case BLK_ZONE_COND_CLOSED:
__set_bit(nreported, zone_info->active_zones);
nactive++;
break;
}
nreported++;
}
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
@@ -396,6 +444,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
goto out;
}
if (max_active_zones) {
if (nactive > max_active_zones) {
btrfs_err_in_rcu(device->fs_info,
"zoned: %u active zones on %s exceeds max_active_zones %u",
nactive, rcu_str_deref(device->name),
max_active_zones);
ret = -EIO;
goto out;
}
atomic_set(&zone_info->active_zones_left,
max_active_zones - nactive);
}
/* Validate superblock log */
nr_zones = BTRFS_NR_SB_LOG_ZONES;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -478,6 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
out:
kfree(zones);
out_free_zone_info:
bitmap_free(zone_info->active_zones);
bitmap_free(zone_info->empty_zones);
bitmap_free(zone_info->seq_zones);
kfree(zone_info);
@@ -493,6 +555,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
if (!zone_info)
return;
bitmap_free(zone_info->active_zones);
bitmap_free(zone_info->seq_zones);
bitmap_free(zone_info->empty_zones);
kfree(zone_info);
@@ -585,7 +648,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
/*
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
* __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
* btrfs_create_chunk(). Since we want stripe_len == zone_size,
* check the alignment here.
*/
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
@@ -664,7 +727,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
reset = &zones[1];
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
ASSERT(reset->cond == BLK_ZONE_COND_FULL);
ASSERT(sb_zone_is_full(reset));
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
reset->start, reset->len,
@@ -676,9 +739,20 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
reset->wp = reset->start;
}
} else if (ret != -ENOENT) {
/* For READ, we want the precious one */
/*
* For READ, we want the previous one. Move write pointer to
* the end of a zone, if it is at the head of a zone.
*/
u64 zone_end = 0;
if (wp == zones[0].start << SECTOR_SHIFT)
wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
zone_end = zones[1].start + zones[1].capacity;
else if (wp == zones[1].start << SECTOR_SHIFT)
zone_end = zones[0].start + zones[0].capacity;
if (zone_end)
wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT,
BTRFS_SUPER_INFO_SIZE);
wp -= BTRFS_SUPER_INFO_SIZE;
}
@@ -771,36 +845,56 @@ static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
return true;
}
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
{
struct btrfs_zoned_device_info *zinfo = device->zone_info;
struct blk_zone *zone;
int i;
if (!is_sb_log_zone(zinfo, mirror))
return;
return 0;
zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
if (zone->cond != BLK_ZONE_COND_FULL) {
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
/* Advance the next zone */
if (zone->cond == BLK_ZONE_COND_FULL) {
zone++;
continue;
}
if (zone->cond == BLK_ZONE_COND_EMPTY)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
zone->wp += SUPER_INFO_SECTORS;
if (zone->wp == zone->start + zone->len)
if (sb_zone_is_full(zone)) {
/*
* No room left to write new superblock. Since
* superblock is written with REQ_SYNC, it is safe to
* finish the zone now.
*
* If the write pointer is exactly at the capacity,
* explicit ZONE_FINISH is not necessary.
*/
if (zone->wp != zone->start + zone->capacity) {
int ret;
ret = blkdev_zone_mgmt(device->bdev,
REQ_OP_ZONE_FINISH, zone->start,
zone->len, GFP_NOFS);
if (ret)
return ret;
}
zone->wp = zone->start + zone->len;
zone->cond = BLK_ZONE_COND_FULL;
return;
}
return 0;
}
zone++;
ASSERT(zone->cond != BLK_ZONE_COND_FULL);
if (zone->cond == BLK_ZONE_COND_EMPTY)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL;
/* All the zones are FULL. Should not reach here. */
ASSERT(0);
return -EIO;
}
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
@@ -895,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
return pos;
}
static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
{
struct btrfs_zoned_device_info *zone_info = device->zone_info;
unsigned int zno = (pos >> zone_info->zone_size_shift);
/* We can use any number of zones */
if (zone_info->max_active_zones == 0)
return true;
if (!test_bit(zno, zone_info->active_zones)) {
/* Active zone left? */
if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
return false;
if (test_and_set_bit(zno, zone_info->active_zones)) {
/* Someone already set the bit */
atomic_inc(&zone_info->active_zones_left);
}
}
return true;
}
static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
{
struct btrfs_zoned_device_info *zone_info = device->zone_info;
unsigned int zno = (pos >> zone_info->zone_size_shift);
/* We can use any number of zones */
if (zone_info->max_active_zones == 0)
return;
if (test_and_clear_bit(zno, zone_info->active_zones))
atomic_inc(&zone_info->active_zones_left);
}
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
u64 length, u64 *bytes)
{
@@ -910,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
*bytes = length;
while (length) {
btrfs_dev_set_zone_empty(device, physical);
btrfs_dev_clear_active_zone(device, physical);
physical += device->zone_info->zone_size;
length -= device->zone_info->zone_size;
}
@@ -1039,6 +1169,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
int i;
unsigned int nofs_flag;
u64 *alloc_offsets = NULL;
u64 *caps = NULL;
unsigned long *active = NULL;
u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
@@ -1063,10 +1195,28 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
map = em->map_lookup;
cache->physical_map = kmemdup(map, map_lookup_size(map->num_stripes), GFP_NOFS);
if (!cache->physical_map) {
ret = -ENOMEM;
goto out;
}
alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
if (!alloc_offsets) {
free_extent_map(em);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS);
if (!caps) {
ret = -ENOMEM;
goto out;
}
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
if (!active) {
ret = -ENOMEM;
goto out;
}
for (i = 0; i < map->num_stripes; i++) {
@@ -1131,6 +1281,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
caps[i] = (zone.capacity << SECTOR_SHIFT);
switch (zone.cond) {
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
@@ -1144,14 +1296,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
alloc_offsets[i] = 0;
break;
case BLK_ZONE_COND_FULL:
alloc_offsets[i] = fs_info->zone_size;
alloc_offsets[i] = caps[i];
break;
default:
/* Partially used zone */
alloc_offsets[i] =
((zone.wp - zone.start) << SECTOR_SHIFT);
__set_bit(i, active);
break;
}
/*
* Consider a zone as active if we can allow any number of
* active zones.
*/
if (!device->zone_info->max_active_zones)
__set_bit(i, active);
}
if (num_sequential > 0)
@@ -1169,6 +1329,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
* calculate_alloc_pointer() which takes extent buffer
* locks to avoid deadlock.
*/
/* Zone capacity is always zone size in emulation */
cache->zone_capacity = cache->length;
if (new) {
cache->alloc_offset = 0;
goto out;
@@ -1195,6 +1358,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
cache->alloc_offset = alloc_offsets[0];
cache->zone_capacity = caps[0];
cache->zone_is_active = test_bit(0, active);
break;
case BTRFS_BLOCK_GROUP_DUP:
case BTRFS_BLOCK_GROUP_RAID1:
@@ -1210,6 +1375,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
if (cache->zone_is_active) {
btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
out:
if (cache->alloc_offset > fs_info->zone_size) {
btrfs_err(fs_info,
@@ -1218,6 +1390,14 @@ out:
ret = -EIO;
}
if (cache->alloc_offset > cache->zone_capacity) {
btrfs_err(fs_info,
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
cache->alloc_offset, cache->zone_capacity,
cache->start);
ret = -EIO;
}
/* An extent is allocated after the write pointer */
if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
btrfs_err(fs_info,
@@ -1229,6 +1409,12 @@ out:
if (!ret)
cache->meta_write_pointer = cache->alloc_offset + cache->start;
if (ret) {
kfree(cache->physical_map);
cache->physical_map = NULL;
}
bitmap_free(active);
kfree(caps);
kfree(alloc_offsets);
free_extent_map(em);
@@ -1243,17 +1429,15 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
return;
WARN_ON(cache->bytes_super != 0);
unusable = cache->alloc_offset - cache->used;
free = cache->length - cache->alloc_offset;
unusable = (cache->alloc_offset - cache->used) +
(cache->length - cache->zone_capacity);
free = cache->zone_capacity - cache->alloc_offset;
/* We only need ->free_space in ALLOC_SEQ block groups */
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->free_space_ctl->free_space = free;
cache->zone_unusable = unusable;
/* Should not have any excluded extents. Just in case, though */
btrfs_free_excluded_extents(cache);
}
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
@@ -1304,6 +1488,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
if (!is_data_inode(&inode->vfs_inode))
return false;
/*
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
* extent layout the relocation code has.
* Furthermore we have set aside own block-group from which only the
* relocation "process" can allocate and make sure only one process at a
* time can add pages to an extent that gets relocated, so it's safe to
* use regular REQ_OP_WRITE for this special case.
*/
if (btrfs_is_data_reloc_root(inode->root))
return false;
cache = btrfs_lookup_block_group(fs_info, start);
ASSERT(cache);
if (!cache)
@@ -1440,27 +1635,27 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len
static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
struct blk_zone *zone)
{
struct btrfs_bio *bbio = NULL;
struct btrfs_io_context *bioc = NULL;
u64 mapped_length = PAGE_SIZE;
unsigned int nofs_flag;
int nmirrors;
int i, ret;
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bbio);
if (ret || !bbio || mapped_length < PAGE_SIZE) {
btrfs_put_bbio(bbio);
&mapped_length, &bioc);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
btrfs_put_bioc(bioc);
return -EIO;
}
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
return -EINVAL;
nofs_flag = memalloc_nofs_save();
nmirrors = (int)bbio->num_stripes;
nmirrors = (int)bioc->num_stripes;
for (i = 0; i < nmirrors; i++) {
u64 physical = bbio->stripes[i].physical;
struct btrfs_device *dev = bbio->stripes[i].dev;
u64 physical = bioc->stripes[i].physical;
struct btrfs_device *dev = bioc->stripes[i].dev;
/* Missing device */
if (!dev->bdev)
@@ -1530,3 +1725,251 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
return device;
}
/**
* Activate block group and underlying device zones
*
* @block_group: the block group to activate
*
* Return: true on success, false otherwise
*/
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
bool ret;
if (!btrfs_is_zoned(block_group->fs_info))
return true;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return true;
spin_lock(&block_group->lock);
if (block_group->zone_is_active) {
ret = true;
goto out_unlock;
}
/* No space left */
if (block_group->alloc_offset == block_group->zone_capacity) {
ret = false;
goto out_unlock;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
block_group->zone_is_active = 1;
spin_unlock(&block_group->lock);
/* For the active block group list */
btrfs_get_block_group(block_group);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(list_empty(&block_group->active_bg_list));
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
return true;
out_unlock:
spin_unlock(&block_group->lock);
return ret;
}
int btrfs_zone_finish(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
int ret = 0;
if (!btrfs_is_zoned(fs_info))
return 0;
map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return 0;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
spin_unlock(&block_group->lock);
return 0;
}
/* Check if we have unwritten allocated space */
if ((block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
block_group->alloc_offset > block_group->meta_write_pointer) {
spin_unlock(&block_group->lock);
return -EAGAIN;
}
spin_unlock(&block_group->lock);
ret = btrfs_inc_block_group_ro(block_group, false);
if (ret)
return ret;
/* Ensure all writes in this block group finish */
btrfs_wait_block_group_reservations(block_group);
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
block_group->length);
spin_lock(&block_group->lock);
/*
* Bail out if someone already deactivated the block group, or
* allocated space is left in the block group.
*/
if (!block_group->zone_is_active) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return 0;
}
if (block_group->reserved) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return -EAGAIN;
}
block_group->zone_is_active = 0;
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
device->zone_info->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
btrfs_dec_block_group_ro(block_group);
if (!ret) {
btrfs_dev_clear_active_zone(device, physical);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
/* For active_bg_list */
btrfs_put_block_group(block_group);
}
return ret;
}
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index)
{
struct btrfs_device *device;
bool ret = false;
if (!btrfs_is_zoned(fs_devices->fs_info))
return true;
/* Non-single profiles are not supported yet */
if (raid_index != BTRFS_RAID_SINGLE)
return false;
/* Check if there is a device with active zones left */
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
struct btrfs_zoned_device_info *zinfo = device->zone_info;
if (!device->bdev)
continue;
if (!zinfo->max_active_zones ||
atomic_read(&zinfo->active_zones_left)) {
ret = true;
break;
}
}
mutex_unlock(&fs_devices->device_list_mutex);
return ret;
}
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
{
struct btrfs_block_group *block_group;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
if (!btrfs_is_zoned(fs_info))
return;
block_group = btrfs_lookup_block_group(fs_info, logical);
ASSERT(block_group);
if (logical + length < block_group->start + block_group->zone_capacity)
goto out;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
spin_unlock(&block_group->lock);
goto out;
}
block_group->zone_is_active = 0;
/* We should have consumed all the free space */
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
ASSERT(block_group->free_space_ctl->free_space == 0);
btrfs_clear_treelog_bg(block_group);
spin_unlock(&block_group->lock);
map = block_group->physical_map;
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (!device->zone_info->max_active_zones)
goto out;
btrfs_dev_clear_active_zone(device, physical);
spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
btrfs_put_block_group(block_group);
out:
btrfs_put_block_group(block_group);
}
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
spin_lock(&fs_info->relocation_bg_lock);
if (fs_info->data_reloc_bg == bg->start)
fs_info->data_reloc_bg = 0;
spin_unlock(&fs_info->relocation_bg_lock);
}

View File

@@ -23,8 +23,11 @@ struct btrfs_zoned_device_info {
u64 zone_size;
u8 zone_size_shift;
u32 nr_zones;
unsigned int max_active_zones;
atomic_t active_zones_left;
unsigned long *seq_zones;
unsigned long *empty_zones;
unsigned long *active_zones;
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
};
@@ -40,7 +43,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
u64 *bytenr_ret);
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
u64 *bytenr_ret);
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror);
u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
u64 hole_end, u64 num_bytes);
@@ -66,6 +69,13 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
u64 physical_start, u64 physical_pos);
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
u64 logical, u64 length);
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
int raid_index);
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -113,8 +123,10 @@ static inline int btrfs_sb_log_location(struct btrfs_device *device, int mirror,
return 0;
}
static inline void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
{ }
static inline int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
{
return 0;
}
static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
{
@@ -199,6 +211,27 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
return ERR_PTR(-EOPNOTSUPP);
}
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
return true;
}
static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
{
return 0;
}
static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
int raid_index)
{
return true;
}
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
u64 logical, u64 length) { }
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)

View File

@@ -1782,12 +1782,13 @@ EXPORT_SYMBOL(generic_update_time);
* This does the actual work of updating an inodes time or version. Must have
* had called mnt_want_write() before calling this.
*/
static int update_time(struct inode *inode, struct timespec64 *time, int flags)
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
{
if (inode->i_op->update_time)
return inode->i_op->update_time(inode, time, flags);
return generic_update_time(inode, time, flags);
}
EXPORT_SYMBOL(inode_update_time);
/**
* atime_needs_update - update the access time
@@ -1857,7 +1858,7 @@ void touch_atime(const struct path *path)
* of the fs read only, e.g. subvolumes in Btrfs.
*/
now = current_time(inode);
update_time(inode, &now, S_ATIME);
inode_update_time(inode, &now, S_ATIME);
__mnt_drop_write(mnt);
skip_update:
sb_end_write(inode->i_sb);
@@ -2002,7 +2003,7 @@ int file_update_time(struct file *file)
if (__mnt_want_write_file(file))
return 0;
ret = update_time(inode, &now, sync_it);
ret = inode_update_time(inode, &now, sync_it);
__mnt_drop_write_file(file);
return ret;

View File

@@ -2496,6 +2496,8 @@ enum file_time_flags {
extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
static inline void file_accessed(struct file *file)
{
if (!(file->f_flags & O_NOATIME))

View File

@@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args {
*/
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
/*
* Read the protocol version in the structure
*/
#define BTRFS_SEND_FLAG_VERSION 0x8
#define BTRFS_SEND_FLAG_MASK \
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
BTRFS_SEND_FLAG_OMIT_END_CMD)
BTRFS_SEND_FLAG_OMIT_END_CMD | \
BTRFS_SEND_FLAG_VERSION)
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
@@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args {
__u64 __user *clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
__u32 version; /* in */
__u8 reserved[28]; /* in */
};
/*