mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-07 19:30:30 +09:00
Merge 037c50bfbe ("Merge tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux") into android-mainline
Steps on the way to 5.16-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ide84fd0886d433adb3c6d279c1ddaf4816055a3c
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/list_sort.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "block-group.h"
|
||||
@@ -144,6 +145,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
||||
*/
|
||||
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache->physical_map);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
@@ -902,6 +904,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
spin_unlock(&cluster->refill_lock);
|
||||
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
btrfs_clear_data_reloc_bg(block_group);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
@@ -1484,6 +1487,21 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want block groups with a low number of used bytes to be in the beginning
|
||||
* of the list, so they will get reclaimed first.
|
||||
*/
|
||||
static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
|
||||
const struct list_head *b)
|
||||
{
|
||||
const struct btrfs_block_group *bg1, *bg2;
|
||||
|
||||
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
|
||||
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
|
||||
|
||||
return bg1->used > bg2->used;
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info =
|
||||
@@ -1508,6 +1526,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
/*
|
||||
* Sort happens under lock because we can't simply splice it and sort.
|
||||
* The block groups might still be in use and reachable via bg_list,
|
||||
* and their presence in the reclaim_bgs list must be preserved.
|
||||
*/
|
||||
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
|
||||
while (!list_empty(&fs_info->reclaim_bgs)) {
|
||||
u64 zone_unusable;
|
||||
int ret = 0;
|
||||
@@ -1895,6 +1919,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
INIT_LIST_HEAD(&cache->discard_list);
|
||||
INIT_LIST_HEAD(&cache->dirty_list);
|
||||
INIT_LIST_HEAD(&cache->io_list);
|
||||
INIT_LIST_HEAD(&cache->active_bg_list);
|
||||
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||
atomic_set(&cache->frozen, 0);
|
||||
mutex_init(&cache->free_space_lock);
|
||||
@@ -2035,6 +2060,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
*/
|
||||
if (btrfs_is_zoned(info)) {
|
||||
btrfs_calc_zone_unusable(cache);
|
||||
/* Should not have any excluded extents. Just in case, though. */
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->length == cache->used) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
@@ -2062,15 +2089,18 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
link_block_group(cache);
|
||||
|
||||
set_avail_alloc_bits(info, cache->flags);
|
||||
if (btrfs_chunk_readonly(info, cache->start)) {
|
||||
if (btrfs_chunk_writeable(info, cache->start)) {
|
||||
if (cache->used == 0) {
|
||||
ASSERT(list_empty(&cache->bg_list));
|
||||
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
||||
else
|
||||
btrfs_mark_bg_unused(cache);
|
||||
}
|
||||
} else {
|
||||
inc_block_group_ro(cache, 1);
|
||||
} else if (cache->used == 0) {
|
||||
ASSERT(list_empty(&cache->bg_list));
|
||||
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
||||
else
|
||||
btrfs_mark_bg_unused(cache);
|
||||
}
|
||||
|
||||
return 0;
|
||||
error:
|
||||
btrfs_put_block_group(cache);
|
||||
@@ -2438,6 +2468,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* New block group is likely to be used soon. Try to activate it now.
|
||||
* Failure is OK for now.
|
||||
*/
|
||||
btrfs_zone_activate(cache);
|
||||
|
||||
ret = exclude_super_stripes(cache);
|
||||
if (ret) {
|
||||
/* We may have excluded something, so call this just in case */
|
||||
@@ -2479,7 +2515,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
*/
|
||||
trace_btrfs_add_block_group(fs_info, cache, 1);
|
||||
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
||||
cache->bytes_super, 0, &cache->space_info);
|
||||
cache->bytes_super, cache->zone_unusable,
|
||||
&cache->space_info);
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
|
||||
link_block_group(cache);
|
||||
@@ -2594,7 +2631,9 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
|
||||
if (!--cache->ro) {
|
||||
if (btrfs_is_zoned(cache->fs_info)) {
|
||||
/* Migrate zone_unusable bytes back */
|
||||
cache->zone_unusable = cache->alloc_offset - cache->used;
|
||||
cache->zone_unusable =
|
||||
(cache->alloc_offset - cache->used) +
|
||||
(cache->length - cache->zone_capacity);
|
||||
sinfo->bytes_zone_unusable += cache->zone_unusable;
|
||||
sinfo->bytes_readonly -= cache->zone_unusable;
|
||||
}
|
||||
@@ -3143,7 +3182,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
|
||||
}
|
||||
|
||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, int alloc)
|
||||
u64 bytenr, u64 num_bytes, bool alloc)
|
||||
{
|
||||
struct btrfs_fs_info *info = trans->fs_info;
|
||||
struct btrfs_block_group *cache = NULL;
|
||||
@@ -3380,36 +3419,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
|
||||
*/
|
||||
check_system_chunk(trans, flags);
|
||||
|
||||
bg = btrfs_alloc_chunk(trans, flags);
|
||||
bg = btrfs_create_chunk(trans, flags);
|
||||
if (IS_ERR(bg)) {
|
||||
ret = PTR_ERR(bg);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a system chunk allocation then stop right here and do not
|
||||
* add the chunk item to the chunk btree. This is to prevent a deadlock
|
||||
* because this system chunk allocation can be triggered while COWing
|
||||
* some extent buffer of the chunk btree and while holding a lock on a
|
||||
* parent extent buffer, in which case attempting to insert the chunk
|
||||
* item (or update the device item) would result in a deadlock on that
|
||||
* parent extent buffer. In this case defer the chunk btree updates to
|
||||
* the second phase of chunk allocation and keep our reservation until
|
||||
* the second phase completes.
|
||||
*
|
||||
* This is a rare case and can only be triggered by the very few cases
|
||||
* we have where we need to touch the chunk btree outside chunk allocation
|
||||
* and chunk removal. These cases are basically adding a device, removing
|
||||
* a device or resizing a device.
|
||||
*/
|
||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
||||
/*
|
||||
* Normally we are not expected to fail with -ENOSPC here, since we have
|
||||
* previously reserved space in the system space_info and allocated one
|
||||
* new system chunk if necessary. However there are two exceptions:
|
||||
* new system chunk if necessary. However there are three exceptions:
|
||||
*
|
||||
* 1) We may have enough free space in the system space_info but all the
|
||||
* existing system block groups have a profile which can not be used
|
||||
@@ -3435,13 +3455,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
|
||||
* with enough free space got turned into RO mode by a running scrub,
|
||||
* and in this case we have to allocate a new one and retry. We only
|
||||
* need do this allocate and retry once, since we have a transaction
|
||||
* handle and scrub uses the commit root to search for block groups.
|
||||
* handle and scrub uses the commit root to search for block groups;
|
||||
*
|
||||
* 3) We had one system block group with enough free space when we called
|
||||
* check_system_chunk(), but after that, right before we tried to
|
||||
* allocate the last extent buffer we needed, a discard operation came
|
||||
* in and it temporarily removed the last free space entry from the
|
||||
* block group (discard removes a free space entry, discards it, and
|
||||
* then adds back the entry to the block group cache).
|
||||
*/
|
||||
if (ret == -ENOSPC) {
|
||||
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
|
||||
struct btrfs_block_group *sys_bg;
|
||||
|
||||
sys_bg = btrfs_alloc_chunk(trans, sys_flags);
|
||||
sys_bg = btrfs_create_chunk(trans, sys_flags);
|
||||
if (IS_ERR(sys_bg)) {
|
||||
ret = PTR_ERR(sys_bg);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -3519,7 +3546,15 @@ out:
|
||||
* properly, either intentionally or as a bug. One example where this is
|
||||
* done intentionally is fsync, as it does not reserve any transaction units
|
||||
* and ends up allocating a variable number of metadata extents for log
|
||||
* tree extent buffers.
|
||||
* tree extent buffers;
|
||||
*
|
||||
* 4) The task has reserved enough transaction units / metadata space, but right
|
||||
* before it tries to allocate the last extent buffer it needs, a discard
|
||||
* operation comes in and, temporarily, removes the last free space entry from
|
||||
* the only metadata block group that had free space (discard starts by
|
||||
* removing a free space entry from a block group, then does the discard
|
||||
* operation and, once it's done, it adds back the free space entry to the
|
||||
* block group).
|
||||
*
|
||||
* We also need this 2 phases setup when adding a device to a filesystem with
|
||||
* a seed device - we must create new metadata and system chunks without adding
|
||||
@@ -3537,14 +3572,14 @@ out:
|
||||
* This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of
|
||||
* the system chunk array due to concurrent allocations") provides more details.
|
||||
*
|
||||
* For allocation of system chunks, we defer the updates and insertions into the
|
||||
* chunk btree to phase 2. This is to prevent deadlocks on extent buffers because
|
||||
* if the chunk allocation is triggered while COWing an extent buffer of the
|
||||
* chunk btree, we are holding a lock on the parent of that extent buffer and
|
||||
* doing the chunk btree updates and insertions can require locking that parent.
|
||||
* This is for the very few and rare cases where we update the chunk btree that
|
||||
* are not chunk allocation or chunk removal: adding a device, removing a device
|
||||
* or resizing a device.
|
||||
* Allocation of system chunks does not happen through this function. A task that
|
||||
* needs to update the chunk btree (the only btree that uses system chunks), must
|
||||
* preallocate chunk space by calling either check_system_chunk() or
|
||||
* btrfs_reserve_chunk_metadata() - the former is used when allocating a data or
|
||||
* metadata chunk or when removing a chunk, while the later is used before doing
|
||||
* a modification to the chunk btree - use cases for the later are adding,
|
||||
* removing and resizing a device as well as relocation of a system chunk.
|
||||
* See the comment below for more details.
|
||||
*
|
||||
* The reservation of system space, done through check_system_chunk(), as well
|
||||
* as all the updates and insertions into the chunk btree must be done while
|
||||
@@ -3581,11 +3616,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
||||
if (trans->allocating_chunk)
|
||||
return -ENOSPC;
|
||||
/*
|
||||
* If we are removing a chunk, don't re-enter or we would deadlock.
|
||||
* System space reservation and system chunk allocation is done by the
|
||||
* chunk remove operation (btrfs_remove_chunk()).
|
||||
* Allocation of system chunks can not happen through this path, as we
|
||||
* could end up in a deadlock if we are allocating a data or metadata
|
||||
* chunk and there is another task modifying the chunk btree.
|
||||
*
|
||||
* This is because while we are holding the chunk mutex, we will attempt
|
||||
* to add the new chunk item to the chunk btree or update an existing
|
||||
* device item in the chunk btree, while the other task that is modifying
|
||||
* the chunk btree is attempting to COW an extent buffer while holding a
|
||||
* lock on it and on its parent - if the COW operation triggers a system
|
||||
* chunk allocation, then we can deadlock because we are holding the
|
||||
* chunk mutex and we may need to access that extent buffer or its parent
|
||||
* in order to add the chunk item or update a device item.
|
||||
*
|
||||
* Tasks that want to modify the chunk tree should reserve system space
|
||||
* before updating the chunk btree, by calling either
|
||||
* btrfs_reserve_chunk_metadata() or check_system_chunk().
|
||||
* It's possible that after a task reserves the space, it still ends up
|
||||
* here - this happens in the cases described above at do_chunk_alloc().
|
||||
* The task will have to either retry or fail.
|
||||
*/
|
||||
if (trans->removing_chunk)
|
||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
return -ENOSPC;
|
||||
|
||||
space_info = btrfs_find_space_info(fs_info, flags);
|
||||
@@ -3684,17 +3735,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
|
||||
return num_dev;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve space in the system space for allocating or removing a chunk
|
||||
*/
|
||||
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||
static void reserve_chunk_space(struct btrfs_trans_handle *trans,
|
||||
u64 bytes,
|
||||
u64 type)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_space_info *info;
|
||||
u64 left;
|
||||
u64 thresh;
|
||||
int ret = 0;
|
||||
u64 num_devs;
|
||||
|
||||
/*
|
||||
* Needed because we can end up allocating a system chunk and for an
|
||||
@@ -3707,19 +3755,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||
left = info->total_bytes - btrfs_space_info_used(info, true);
|
||||
spin_unlock(&info->lock);
|
||||
|
||||
num_devs = get_profile_num_devs(fs_info, type);
|
||||
|
||||
/* num_devs device items to update and 1 chunk item to add or remove */
|
||||
thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
|
||||
btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||
|
||||
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
|
||||
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
|
||||
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
|
||||
left, thresh, type);
|
||||
left, bytes, type);
|
||||
btrfs_dump_space_info(fs_info, info, 0, 0);
|
||||
}
|
||||
|
||||
if (left < thresh) {
|
||||
if (left < bytes) {
|
||||
u64 flags = btrfs_system_alloc_profile(fs_info);
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
@@ -3728,21 +3770,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||
* needing it, as we might not need to COW all nodes/leafs from
|
||||
* the paths we visit in the chunk tree (they were already COWed
|
||||
* or created in the current transaction for example).
|
||||
*
|
||||
* Also, if our caller is allocating a system chunk, do not
|
||||
* attempt to insert the chunk item in the chunk btree, as we
|
||||
* could deadlock on an extent buffer since our caller may be
|
||||
* COWing an extent buffer from the chunk btree.
|
||||
*/
|
||||
bg = btrfs_alloc_chunk(trans, flags);
|
||||
bg = btrfs_create_chunk(trans, flags);
|
||||
if (IS_ERR(bg)) {
|
||||
ret = PTR_ERR(bg);
|
||||
} else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
|
||||
} else {
|
||||
/*
|
||||
* If we fail to add the chunk item here, we end up
|
||||
* trying again at phase 2 of chunk allocation, at
|
||||
* btrfs_create_pending_block_groups(). So ignore
|
||||
* any error here.
|
||||
* any error here. An ENOSPC here could happen, due to
|
||||
* the cases described at do_chunk_alloc() - the system
|
||||
* block group we just created was just turned into RO
|
||||
* mode by a scrub for example, or a running discard
|
||||
* temporarily removed its free space entries, etc.
|
||||
*/
|
||||
btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
||||
}
|
||||
@@ -3751,12 +3792,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||
if (!ret) {
|
||||
ret = btrfs_block_rsv_add(fs_info->chunk_root,
|
||||
&fs_info->chunk_block_rsv,
|
||||
thresh, BTRFS_RESERVE_NO_FLUSH);
|
||||
bytes, BTRFS_RESERVE_NO_FLUSH);
|
||||
if (!ret)
|
||||
trans->chunk_bytes_reserved += thresh;
|
||||
trans->chunk_bytes_reserved += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve space in the system space for allocating or removing a chunk.
|
||||
* The caller must be holding fs_info->chunk_mutex.
|
||||
*/
|
||||
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
const u64 num_devs = get_profile_num_devs(fs_info, type);
|
||||
u64 bytes;
|
||||
|
||||
/* num_devs device items to update and 1 chunk item to add or remove. */
|
||||
bytes = btrfs_calc_metadata_size(fs_info, num_devs) +
|
||||
btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||
|
||||
reserve_chunk_space(trans, bytes, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve space in the system space, if needed, for doing a modification to the
|
||||
* chunk btree.
|
||||
*
|
||||
* @trans: A transaction handle.
|
||||
* @is_item_insertion: Indicate if the modification is for inserting a new item
|
||||
* in the chunk btree or if it's for the deletion or update
|
||||
* of an existing item.
|
||||
*
|
||||
* This is used in a context where we need to update the chunk btree outside
|
||||
* block group allocation and removal, to avoid a deadlock with a concurrent
|
||||
* task that is allocating a metadata or data block group and therefore needs to
|
||||
* update the chunk btree while holding the chunk mutex. After the update to the
|
||||
* chunk btree is done, btrfs_trans_release_chunk_metadata() should be called.
|
||||
*
|
||||
*/
|
||||
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
bool is_item_insertion)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
u64 bytes;
|
||||
|
||||
if (is_item_insertion)
|
||||
bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||
else
|
||||
bytes = btrfs_calc_metadata_size(fs_info, 1);
|
||||
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
}
|
||||
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
@@ -3833,6 +3923,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||
}
|
||||
spin_unlock(&info->unused_bgs_lock);
|
||||
|
||||
spin_lock(&info->zone_active_bgs_lock);
|
||||
while (!list_empty(&info->zone_active_bgs)) {
|
||||
block_group = list_first_entry(&info->zone_active_bgs,
|
||||
struct btrfs_block_group,
|
||||
active_bg_list);
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
spin_unlock(&info->zone_active_bgs_lock);
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
||||
block_group = rb_entry(n, struct btrfs_block_group,
|
||||
|
||||
@@ -98,6 +98,7 @@ struct btrfs_block_group {
|
||||
unsigned int to_copy:1;
|
||||
unsigned int relocating_repair:1;
|
||||
unsigned int chunk_item_inserted:1;
|
||||
unsigned int zone_is_active:1;
|
||||
|
||||
int disk_cache_state;
|
||||
|
||||
@@ -202,7 +203,10 @@ struct btrfs_block_group {
|
||||
*/
|
||||
u64 alloc_offset;
|
||||
u64 zone_unusable;
|
||||
u64 zone_capacity;
|
||||
u64 meta_write_pointer;
|
||||
struct map_lookup *physical_map;
|
||||
struct list_head active_bg_list;
|
||||
};
|
||||
|
||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||
@@ -280,7 +284,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
|
||||
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
|
||||
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
|
||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, int alloc);
|
||||
u64 bytenr, u64 num_bytes, bool alloc);
|
||||
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
|
||||
u64 ram_bytes, u64 num_bytes, int delalloc);
|
||||
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
|
||||
@@ -289,6 +293,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
||||
enum btrfs_chunk_alloc_enum force);
|
||||
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
|
||||
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
|
||||
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
bool is_item_insertion);
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
||||
|
||||
@@ -138,17 +138,34 @@ struct btrfs_inode {
|
||||
/* a local copy of root's last_log_commit */
|
||||
int last_log_commit;
|
||||
|
||||
/* total number of bytes pending delalloc, used by stat to calc the
|
||||
* real block usage of the file
|
||||
*/
|
||||
u64 delalloc_bytes;
|
||||
union {
|
||||
/*
|
||||
* Total number of bytes pending delalloc, used by stat to
|
||||
* calculate the real block usage of the file. This is used
|
||||
* only for files.
|
||||
*/
|
||||
u64 delalloc_bytes;
|
||||
/*
|
||||
* The offset of the last dir item key that was logged.
|
||||
* This is used only for directories.
|
||||
*/
|
||||
u64 last_dir_item_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Total number of bytes pending delalloc that fall within a file
|
||||
* range that is either a hole or beyond EOF (and no prealloc extent
|
||||
* exists in the range). This is always <= delalloc_bytes.
|
||||
*/
|
||||
u64 new_delalloc_bytes;
|
||||
union {
|
||||
/*
|
||||
* Total number of bytes pending delalloc that fall within a file
|
||||
* range that is either a hole or beyond EOF (and no prealloc extent
|
||||
* exists in the range). This is always <= delalloc_bytes and this
|
||||
* is used only for files.
|
||||
*/
|
||||
u64 new_delalloc_bytes;
|
||||
/*
|
||||
* The offset of the last dir index key that was logged.
|
||||
* This is used only for directories.
|
||||
*/
|
||||
u64 last_dir_index_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* total number of bytes pending defrag, used by stat to check whether
|
||||
@@ -339,7 +356,12 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
|
||||
|
||||
struct btrfs_dio_private {
|
||||
struct inode *inode;
|
||||
u64 logical_offset;
|
||||
|
||||
/*
|
||||
* Since DIO can use anonymous page, we cannot use page_offset() to
|
||||
* grab the file offset, thus need a dedicated member for file offset.
|
||||
*/
|
||||
u64 file_offset;
|
||||
u64 disk_bytenr;
|
||||
/* Used for bio::bi_size */
|
||||
u32 bytes;
|
||||
|
||||
@@ -186,7 +186,6 @@ struct btrfsic_dev_state {
|
||||
struct list_head collision_resolving_node; /* list node */
|
||||
struct btrfsic_block dummy_block_for_bio_bh_flush;
|
||||
u64 last_flush_gen;
|
||||
char name[BDEVNAME_SIZE];
|
||||
};
|
||||
|
||||
struct btrfsic_block_hashtable {
|
||||
@@ -403,7 +402,6 @@ static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
|
||||
ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
|
||||
ds->bdev = NULL;
|
||||
ds->state = NULL;
|
||||
ds->name[0] = '\0';
|
||||
INIT_LIST_HEAD(&ds->collision_resolving_node);
|
||||
ds->last_flush_gen = 0;
|
||||
btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
|
||||
@@ -756,10 +754,10 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||
btrfs_info_in_rcu(fs_info,
|
||||
"new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
|
||||
"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
|
||||
superblock_bdev,
|
||||
rcu_str_deref(device->name), dev_bytenr,
|
||||
dev_state->name, dev_bytenr,
|
||||
dev_state->bdev, dev_bytenr,
|
||||
superblock_mirror_num);
|
||||
list_add(&superblock_tmp->all_blocks_node,
|
||||
&state->all_blocks_list);
|
||||
@@ -938,9 +936,10 @@ continue_with_current_leaf_stack_frame:
|
||||
if (disk_item_offset + sizeof(struct btrfs_item) >
|
||||
sf->block_ctx->len) {
|
||||
leaf_item_out_of_bounce_error:
|
||||
pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
|
||||
pr_info(
|
||||
"btrfsic: leaf item out of bounce at logical %llu, dev %pg\n",
|
||||
sf->block_ctx->start,
|
||||
sf->block_ctx->dev->name);
|
||||
sf->block_ctx->dev->bdev);
|
||||
goto one_stack_frame_backwards;
|
||||
}
|
||||
btrfsic_read_from_block_data(sf->block_ctx,
|
||||
@@ -1058,9 +1057,10 @@ continue_with_current_node_stack_frame:
|
||||
(uintptr_t)nodehdr;
|
||||
if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
|
||||
sf->block_ctx->len) {
|
||||
pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n",
|
||||
pr_info(
|
||||
"btrfsic: node item out of bounce at logical %llu, dev %pg\n",
|
||||
sf->block_ctx->start,
|
||||
sf->block_ctx->dev->name);
|
||||
sf->block_ctx->dev->bdev);
|
||||
goto one_stack_frame_backwards;
|
||||
}
|
||||
btrfsic_read_from_block_data(
|
||||
@@ -1228,15 +1228,17 @@ static int btrfsic_create_link_to_next_block(
|
||||
if (next_block->logical_bytenr != next_bytenr &&
|
||||
!(!next_block->is_metadata &&
|
||||
0 == next_block->logical_bytenr))
|
||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
|
||||
next_bytenr, next_block_ctx->dev->name,
|
||||
pr_info(
|
||||
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
|
||||
next_bytenr, next_block_ctx->dev->bdev,
|
||||
next_block_ctx->dev_bytenr, *mirror_nump,
|
||||
btrfsic_get_block_type(state,
|
||||
next_block),
|
||||
next_block->logical_bytenr);
|
||||
else
|
||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
||||
next_bytenr, next_block_ctx->dev->name,
|
||||
pr_info(
|
||||
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||
next_bytenr, next_block_ctx->dev->bdev,
|
||||
next_block_ctx->dev_bytenr, *mirror_nump,
|
||||
btrfsic_get_block_type(state,
|
||||
next_block));
|
||||
@@ -1324,8 +1326,8 @@ static int btrfsic_handle_extent_data(
|
||||
if (file_extent_item_offset +
|
||||
offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
|
||||
block_ctx->len) {
|
||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
|
||||
block_ctx->start, block_ctx->dev->name);
|
||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
|
||||
block_ctx->start, block_ctx->dev->bdev);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1344,8 +1346,8 @@ static int btrfsic_handle_extent_data(
|
||||
|
||||
if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
|
||||
block_ctx->len) {
|
||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
|
||||
block_ctx->start, block_ctx->dev->name);
|
||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
|
||||
block_ctx->start, block_ctx->dev->bdev);
|
||||
return -1;
|
||||
}
|
||||
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
|
||||
@@ -1421,9 +1423,10 @@ static int btrfsic_handle_extent_data(
|
||||
next_block->logical_bytenr != next_bytenr &&
|
||||
!(!next_block->is_metadata &&
|
||||
0 == next_block->logical_bytenr)) {
|
||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n",
|
||||
pr_info(
|
||||
"referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n",
|
||||
next_bytenr,
|
||||
next_block_ctx.dev->name,
|
||||
next_block_ctx.dev->bdev,
|
||||
next_block_ctx.dev_bytenr,
|
||||
mirror_num,
|
||||
next_block->logical_bytenr);
|
||||
@@ -1455,7 +1458,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
|
||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
||||
int ret;
|
||||
u64 length;
|
||||
struct btrfs_bio *multi = NULL;
|
||||
struct btrfs_io_context *multi = NULL;
|
||||
struct btrfs_device *device;
|
||||
|
||||
length = len;
|
||||
@@ -1561,7 +1564,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
||||
struct bio *bio;
|
||||
unsigned int j;
|
||||
|
||||
bio = btrfs_io_bio_alloc(num_pages - i);
|
||||
bio = btrfs_bio_alloc(num_pages - i);
|
||||
bio_set_dev(bio, block_ctx->dev->bdev);
|
||||
bio->bi_iter.bi_sector = dev_bytenr >> 9;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
@@ -1577,8 +1580,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
||||
return -1;
|
||||
}
|
||||
if (submit_bio_wait(bio)) {
|
||||
pr_info("btrfsic: read error at logical %llu dev %s!\n",
|
||||
block_ctx->start, block_ctx->dev->name);
|
||||
pr_info("btrfsic: read error at logical %llu dev %pg!\n",
|
||||
block_ctx->start, block_ctx->dev->bdev);
|
||||
bio_put(bio);
|
||||
return -1;
|
||||
}
|
||||
@@ -1602,33 +1605,35 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
|
||||
list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
|
||||
const struct btrfsic_block_link *l;
|
||||
|
||||
pr_info("%c-block @%llu (%s/%llu/%d)\n",
|
||||
pr_info("%c-block @%llu (%pg/%llu/%d)\n",
|
||||
btrfsic_get_block_type(state, b_all),
|
||||
b_all->logical_bytenr, b_all->dev_state->name,
|
||||
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||
b_all->dev_bytenr, b_all->mirror_num);
|
||||
|
||||
list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
|
||||
pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n",
|
||||
pr_info(
|
||||
" %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n",
|
||||
btrfsic_get_block_type(state, b_all),
|
||||
b_all->logical_bytenr, b_all->dev_state->name,
|
||||
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||
b_all->dev_bytenr, b_all->mirror_num,
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
}
|
||||
|
||||
list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
|
||||
pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
|
||||
pr_info(
|
||||
" %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
|
||||
btrfsic_get_block_type(state, b_all),
|
||||
b_all->logical_bytenr, b_all->dev_state->name,
|
||||
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||
b_all->dev_bytenr, b_all->mirror_num,
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_from),
|
||||
l->block_ref_from->logical_bytenr,
|
||||
l->block_ref_from->dev_state->name,
|
||||
l->block_ref_from->dev_state->bdev,
|
||||
l->block_ref_from->dev_bytenr,
|
||||
l->block_ref_from->mirror_num);
|
||||
}
|
||||
@@ -1743,16 +1748,18 @@ again:
|
||||
if (block->logical_bytenr != bytenr &&
|
||||
!(!block->is_metadata &&
|
||||
block->logical_bytenr == 0))
|
||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
|
||||
bytenr, dev_state->name,
|
||||
pr_info(
|
||||
"written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
|
||||
bytenr, dev_state->bdev,
|
||||
dev_bytenr,
|
||||
block->mirror_num,
|
||||
btrfsic_get_block_type(state,
|
||||
block),
|
||||
block->logical_bytenr);
|
||||
else
|
||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
||||
bytenr, dev_state->name,
|
||||
pr_info(
|
||||
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||
bytenr, dev_state->bdev,
|
||||
dev_bytenr, block->mirror_num,
|
||||
btrfsic_get_block_type(state,
|
||||
block));
|
||||
@@ -1767,8 +1774,9 @@ again:
|
||||
processed_len = state->datablock_size;
|
||||
bytenr = block->logical_bytenr;
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
||||
bytenr, dev_state->name, dev_bytenr,
|
||||
pr_info(
|
||||
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||
bytenr, dev_state->bdev, dev_bytenr,
|
||||
block->mirror_num,
|
||||
btrfsic_get_block_type(state, block));
|
||||
}
|
||||
@@ -1778,9 +1786,10 @@ again:
|
||||
list_empty(&block->ref_to_list) ? ' ' : '!',
|
||||
list_empty(&block->ref_from_list) ? ' ' : '!');
|
||||
if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
|
||||
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
|
||||
btrfsic_get_block_type(state, block), bytenr,
|
||||
dev_state->name, dev_bytenr, block->mirror_num,
|
||||
dev_state->bdev, dev_bytenr, block->mirror_num,
|
||||
block->generation,
|
||||
btrfs_disk_key_objectid(&block->disk_key),
|
||||
block->disk_key.type,
|
||||
@@ -1792,9 +1801,10 @@ again:
|
||||
}
|
||||
|
||||
if (!block->is_iodone && !block->never_written) {
|
||||
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
|
||||
btrfsic_get_block_type(state, block), bytenr,
|
||||
dev_state->name, dev_bytenr, block->mirror_num,
|
||||
dev_state->bdev, dev_bytenr, block->mirror_num,
|
||||
block->generation,
|
||||
btrfs_stack_header_generation(
|
||||
(struct btrfs_header *)
|
||||
@@ -1921,8 +1931,9 @@ again:
|
||||
if (!is_metadata) {
|
||||
processed_len = state->datablock_size;
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("Written block (%s/%llu/?) !found in hash table, D.\n",
|
||||
dev_state->name, dev_bytenr);
|
||||
pr_info(
|
||||
"written block (%pg/%llu/?) !found in hash table, D\n",
|
||||
dev_state->bdev, dev_bytenr);
|
||||
if (!state->include_extent_data) {
|
||||
/* ignore that written D block */
|
||||
goto continue_loop;
|
||||
@@ -1939,8 +1950,9 @@ again:
|
||||
btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
|
||||
dev_bytenr);
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n",
|
||||
bytenr, dev_state->name, dev_bytenr);
|
||||
pr_info(
|
||||
"written block @%llu (%pg/%llu/?) !found in hash table, M\n",
|
||||
bytenr, dev_state->bdev, dev_bytenr);
|
||||
}
|
||||
|
||||
block_ctx.dev = dev_state;
|
||||
@@ -1995,9 +2007,9 @@ again:
|
||||
block->next_in_same_bio = NULL;
|
||||
}
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("New written %c-block @%llu (%s/%llu/%d)\n",
|
||||
pr_info("new written %c-block @%llu (%pg/%llu/%d)\n",
|
||||
is_metadata ? 'M' : 'D',
|
||||
block->logical_bytenr, block->dev_state->name,
|
||||
block->logical_bytenr, block->dev_state->bdev,
|
||||
block->dev_bytenr, block->mirror_num);
|
||||
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
||||
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
||||
@@ -2041,10 +2053,10 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
||||
|
||||
if ((dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
|
||||
pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n",
|
||||
bp->bi_status,
|
||||
btrfsic_get_block_type(dev_state->state, block),
|
||||
block->logical_bytenr, dev_state->name,
|
||||
block->logical_bytenr, dev_state->bdev,
|
||||
block->dev_bytenr, block->mirror_num);
|
||||
next_block = block->next_in_same_bio;
|
||||
block->iodone_w_error = iodone_w_error;
|
||||
@@ -2052,8 +2064,8 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
||||
dev_state->last_flush_gen++;
|
||||
if ((dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||
pr_info("bio_end_io() new %s flush_gen=%llu\n",
|
||||
dev_state->name,
|
||||
pr_info("bio_end_io() new %pg flush_gen=%llu\n",
|
||||
dev_state->bdev,
|
||||
dev_state->last_flush_gen);
|
||||
}
|
||||
if (block->submit_bio_bh_rw & REQ_FUA)
|
||||
@@ -2078,17 +2090,19 @@ static int btrfsic_process_written_superblock(
|
||||
if (!(superblock->generation > state->max_superblock_generation ||
|
||||
0 == state->max_superblock_generation)) {
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||
pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n",
|
||||
pr_info(
|
||||
"btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n",
|
||||
superblock->logical_bytenr,
|
||||
superblock->dev_state->name,
|
||||
superblock->dev_state->bdev,
|
||||
superblock->dev_bytenr, superblock->mirror_num,
|
||||
btrfs_super_generation(super_hdr),
|
||||
state->max_superblock_generation);
|
||||
} else {
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||
pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n",
|
||||
pr_info(
|
||||
"btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n",
|
||||
superblock->logical_bytenr,
|
||||
superblock->dev_state->name,
|
||||
superblock->dev_state->bdev,
|
||||
superblock->dev_bytenr, superblock->mirror_num,
|
||||
btrfs_super_generation(super_hdr),
|
||||
state->max_superblock_generation);
|
||||
@@ -2232,38 +2246,42 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
||||
*/
|
||||
list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n",
|
||||
pr_info(
|
||||
"rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n",
|
||||
recursion_level,
|
||||
btrfsic_get_block_type(state, block),
|
||||
block->logical_bytenr, block->dev_state->name,
|
||||
block->logical_bytenr, block->dev_state->bdev,
|
||||
block->dev_bytenr, block->mirror_num,
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
if (l->block_ref_to->never_written) {
|
||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n",
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
ret = -1;
|
||||
} else if (!l->block_ref_to->is_iodone) {
|
||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n",
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
ret = -1;
|
||||
} else if (l->block_ref_to->iodone_w_error) {
|
||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n",
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
ret = -1;
|
||||
@@ -2273,10 +2291,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
||||
l->parent_generation &&
|
||||
BTRFSIC_GENERATION_UNKNOWN !=
|
||||
l->block_ref_to->generation) {
|
||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n",
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num,
|
||||
l->block_ref_to->generation,
|
||||
@@ -2284,10 +2303,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
||||
ret = -1;
|
||||
} else if (l->block_ref_to->flush_gen >
|
||||
l->block_ref_to->dev_state->last_flush_gen) {
|
||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name,
|
||||
l->block_ref_to->dev_state->bdev,
|
||||
l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num, block->flush_gen,
|
||||
l->block_ref_to->dev_state->last_flush_gen);
|
||||
@@ -2324,15 +2344,16 @@ static int btrfsic_is_block_ref_by_superblock(
|
||||
*/
|
||||
list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
|
||||
pr_info(
|
||||
"rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
|
||||
recursion_level,
|
||||
btrfsic_get_block_type(state, block),
|
||||
block->logical_bytenr, block->dev_state->name,
|
||||
block->logical_bytenr, block->dev_state->bdev,
|
||||
block->dev_bytenr, block->mirror_num,
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_from),
|
||||
l->block_ref_from->logical_bytenr,
|
||||
l->block_ref_from->dev_state->name,
|
||||
l->block_ref_from->dev_state->bdev,
|
||||
l->block_ref_from->dev_bytenr,
|
||||
l->block_ref_from->mirror_num);
|
||||
if (l->block_ref_from->is_superblock &&
|
||||
@@ -2354,30 +2375,30 @@ static int btrfsic_is_block_ref_by_superblock(
|
||||
static void btrfsic_print_add_link(const struct btrfsic_state *state,
|
||||
const struct btrfsic_block_link *l)
|
||||
{
|
||||
pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
|
||||
pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_from),
|
||||
l->block_ref_from->logical_bytenr,
|
||||
l->block_ref_from->dev_state->name,
|
||||
l->block_ref_from->dev_state->bdev,
|
||||
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
}
|
||||
|
||||
static void btrfsic_print_rem_link(const struct btrfsic_state *state,
|
||||
const struct btrfsic_block_link *l)
|
||||
{
|
||||
pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
|
||||
pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
|
||||
l->ref_cnt,
|
||||
btrfsic_get_block_type(state, l->block_ref_from),
|
||||
l->block_ref_from->logical_bytenr,
|
||||
l->block_ref_from->dev_state->name,
|
||||
l->block_ref_from->dev_state->bdev,
|
||||
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
||||
btrfsic_get_block_type(state, l->block_ref_to),
|
||||
l->block_ref_to->logical_bytenr,
|
||||
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
|
||||
l->block_ref_to->mirror_num);
|
||||
}
|
||||
|
||||
@@ -2419,9 +2440,9 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
|
||||
* This algorithm is recursive because the amount of used stack space
|
||||
* is very small and the max recursion depth is limited.
|
||||
*/
|
||||
indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
|
||||
indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)",
|
||||
btrfsic_get_block_type(state, block),
|
||||
block->logical_bytenr, block->dev_state->name,
|
||||
block->logical_bytenr, block->dev_state->bdev,
|
||||
block->dev_bytenr, block->mirror_num);
|
||||
if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
|
||||
printk("[...]\n");
|
||||
@@ -2542,10 +2563,10 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
|
||||
block->never_written = never_written;
|
||||
block->mirror_num = mirror_num;
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
pr_info("New %s%c-block @%llu (%s/%llu/%d)\n",
|
||||
pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n",
|
||||
additional_string,
|
||||
btrfsic_get_block_type(state, block),
|
||||
block->logical_bytenr, dev_state->name,
|
||||
block->logical_bytenr, dev_state->bdev,
|
||||
block->dev_bytenr, mirror_num);
|
||||
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
||||
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
||||
@@ -2592,8 +2613,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
|
||||
}
|
||||
|
||||
if (WARN_ON(!match)) {
|
||||
pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n",
|
||||
bytenr, dev_state->name, dev_bytenr);
|
||||
pr_info(
|
||||
"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n",
|
||||
bytenr, dev_state->bdev, dev_bytenr);
|
||||
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
|
||||
ret = btrfsic_map_block(state, bytenr,
|
||||
state->metablock_size,
|
||||
@@ -2601,8 +2623,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n",
|
||||
bytenr, block_ctx.dev->name,
|
||||
pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n",
|
||||
bytenr, block_ctx.dev->bdev,
|
||||
block_ctx.dev_bytenr, mirror_num);
|
||||
}
|
||||
}
|
||||
@@ -2675,8 +2697,9 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
||||
if ((dev_state->state->print_mask &
|
||||
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
||||
BTRFSIC_PRINT_MASK_VERBOSE)))
|
||||
pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n",
|
||||
dev_state->name);
|
||||
pr_info(
|
||||
"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
|
||||
dev_state->bdev);
|
||||
} else {
|
||||
struct btrfsic_block *const block =
|
||||
&dev_state->dummy_block_for_bio_bh_flush;
|
||||
@@ -2751,7 +2774,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
|
||||
list_for_each_entry(device, dev_head, dev_list) {
|
||||
struct btrfsic_dev_state *ds;
|
||||
const char *p;
|
||||
|
||||
if (!device->bdev || !device->name)
|
||||
continue;
|
||||
@@ -2763,10 +2785,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
ds->bdev = device->bdev;
|
||||
ds->state = state;
|
||||
bdevname(ds->bdev, ds->name);
|
||||
ds->name[BDEVNAME_SIZE - 1] = '\0';
|
||||
p = kbasename(ds->name);
|
||||
strlcpy(ds->name, p, sizeof(ds->name));
|
||||
btrfsic_dev_state_hashtable_add(ds,
|
||||
&btrfsic_dev_state_hashtable);
|
||||
}
|
||||
@@ -2844,9 +2862,10 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
|
||||
if (b_all->is_iodone || b_all->never_written)
|
||||
btrfsic_block_free(b_all);
|
||||
else
|
||||
pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n",
|
||||
pr_info(
|
||||
"btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n",
|
||||
btrfsic_get_block_type(state, b_all),
|
||||
b_all->logical_bytenr, b_all->dev_state->name,
|
||||
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||
b_all->dev_bytenr, b_all->mirror_num);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "compression.h"
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
#include "subpage.h"
|
||||
#include "zoned.h"
|
||||
|
||||
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
|
||||
@@ -181,9 +182,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||
if (memcmp(&csum, cb_sum, csum_size) != 0) {
|
||||
btrfs_print_data_csum_error(inode, disk_start,
|
||||
csum, cb_sum, cb->mirror_num);
|
||||
if (btrfs_io_bio(bio)->device)
|
||||
if (btrfs_bio(bio)->device)
|
||||
btrfs_dev_stat_inc_and_print(
|
||||
btrfs_io_bio(bio)->device,
|
||||
btrfs_bio(bio)->device,
|
||||
BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
||||
return -EIO;
|
||||
}
|
||||
@@ -194,6 +195,87 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reduce bio and io accounting for a compressed_bio with its corresponding bio.
|
||||
*
|
||||
* Return true if there is no pending bio nor io.
|
||||
* Return false otherwise.
|
||||
*/
|
||||
static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
unsigned int bi_size = 0;
|
||||
bool last_io = false;
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
/*
|
||||
* At endio time, bi_iter.bi_size doesn't represent the real bio size.
|
||||
* Thus here we have to iterate through all segments to grab correct
|
||||
* bio size.
|
||||
*/
|
||||
bio_for_each_segment_all(bvec, bio, iter_all)
|
||||
bi_size += bvec->bv_len;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->errors = 1;
|
||||
|
||||
ASSERT(bi_size && bi_size <= cb->compressed_len);
|
||||
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
|
||||
&cb->pending_sectors);
|
||||
/*
|
||||
* Here we must wake up the possible error handler after all other
|
||||
* operations on @cb finished, or we can race with
|
||||
* finish_compressed_bio_*() which may free @cb.
|
||||
*/
|
||||
wake_up_var(cb);
|
||||
|
||||
return last_io;
|
||||
}
|
||||
|
||||
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
|
||||
{
|
||||
unsigned int index;
|
||||
struct page *page;
|
||||
|
||||
/* Release the compressed pages */
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
page->mapping = NULL;
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* Do io completion on the original bio */
|
||||
if (cb->errors) {
|
||||
bio_io_error(cb->orig_bio);
|
||||
} else {
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
ASSERT(bio);
|
||||
ASSERT(!bio->bi_status);
|
||||
/*
|
||||
* We have verified the checksum already, set page checked so
|
||||
* the end_io handlers know about it
|
||||
*/
|
||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
|
||||
u64 bvec_start = page_offset(bvec->bv_page) +
|
||||
bvec->bv_offset;
|
||||
|
||||
btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
|
||||
bvec->bv_page, bvec_start,
|
||||
bvec->bv_len);
|
||||
}
|
||||
|
||||
bio_endio(cb->orig_bio);
|
||||
}
|
||||
|
||||
/* Finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
}
|
||||
|
||||
/* when we finish reading compressed pages from the disk, we
|
||||
* decompress them and then run the bio end_io routines on the
|
||||
* decompressed pages (in the inode address space).
|
||||
@@ -208,25 +290,17 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct inode *inode;
|
||||
struct page *page;
|
||||
unsigned int index;
|
||||
unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
|
||||
unsigned int mirror = btrfs_bio(bio)->mirror_num;
|
||||
int ret = 0;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->errors = 1;
|
||||
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
*/
|
||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
||||
if (!dec_and_test_compressed_bio(cb, bio))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Record the correct mirror_num in cb->orig_bio so that
|
||||
* read-repair can work properly.
|
||||
*/
|
||||
btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
|
||||
btrfs_bio(cb->orig_bio)->mirror_num = mirror;
|
||||
cb->mirror_num = mirror;
|
||||
|
||||
/*
|
||||
@@ -250,36 +324,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
csum_failed:
|
||||
if (ret)
|
||||
cb->errors = 1;
|
||||
|
||||
/* release the compressed pages */
|
||||
index = 0;
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
page->mapping = NULL;
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* do io completion on the original bio */
|
||||
if (cb->errors) {
|
||||
bio_io_error(cb->orig_bio);
|
||||
} else {
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
/*
|
||||
* we have verified the checksum already, set page
|
||||
* checked so the end_io handlers know about it
|
||||
*/
|
||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
|
||||
SetPageChecked(bvec->bv_page);
|
||||
|
||||
bio_endio(cb->orig_bio);
|
||||
}
|
||||
|
||||
/* finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
finish_compressed_bio_read(cb, bio);
|
||||
out:
|
||||
bio_put(bio);
|
||||
}
|
||||
@@ -291,6 +336,7 @@ out:
|
||||
static noinline void end_compressed_writeback(struct inode *inode,
|
||||
const struct compressed_bio *cb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
unsigned long index = cb->start >> PAGE_SHIFT;
|
||||
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
|
||||
struct page *pages[16];
|
||||
@@ -313,7 +359,8 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (cb->errors)
|
||||
SetPageError(pages[i]);
|
||||
end_page_writeback(pages[i]);
|
||||
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
|
||||
cb->start, cb->len);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
nr_pages -= ret;
|
||||
@@ -322,60 +369,127 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
||||
/* the inode may be gone now */
|
||||
}
|
||||
|
||||
/*
|
||||
* do the cleanup once all the compressed pages hit the disk.
|
||||
* This will clear writeback on the file pages and free the compressed
|
||||
* pages.
|
||||
*
|
||||
* This also calls the writeback end hooks for the file pages so that
|
||||
* metadata and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct bio *bio)
|
||||
static void finish_compressed_bio_write(struct compressed_bio *cb)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct inode *inode;
|
||||
struct page *page;
|
||||
struct inode *inode = cb->inode;
|
||||
unsigned int index;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->errors = 1;
|
||||
|
||||
/* if there are more bios still pending for this compressed
|
||||
* extent, just exit
|
||||
/*
|
||||
* Ok, we're the last bio for this extent, step one is to call back
|
||||
* into the FS and do all the end_io operations.
|
||||
*/
|
||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
||||
goto out;
|
||||
|
||||
/* ok, we're the last bio for this extent, step one is to
|
||||
* call back into the FS and do all the end_io operations
|
||||
*/
|
||||
inode = cb->inode;
|
||||
btrfs_record_physical_zoned(inode, cb->start, bio);
|
||||
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
|
||||
cb->start, cb->start + cb->len - 1,
|
||||
!cb->errors);
|
||||
|
||||
end_compressed_writeback(inode, cb);
|
||||
/* note, our inode could be gone now */
|
||||
/* Note, our inode could be gone now */
|
||||
|
||||
/*
|
||||
* release the compressed pages, these came from alloc_page and
|
||||
* Release the compressed pages, these came from alloc_page and
|
||||
* are not attached to the inode at all
|
||||
*/
|
||||
index = 0;
|
||||
for (index = 0; index < cb->nr_pages; index++) {
|
||||
page = cb->compressed_pages[index];
|
||||
struct page *page = cb->compressed_pages[index];
|
||||
|
||||
page->mapping = NULL;
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* finally free the cb struct */
|
||||
/* Finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
kfree(cb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the cleanup once all the compressed pages hit the disk. This will clear
|
||||
* writeback on the file pages and free the compressed pages.
|
||||
*
|
||||
* This also calls the writeback end hooks for the file pages so that metadata
|
||||
* and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct bio *bio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
|
||||
if (!dec_and_test_compressed_bio(cb, bio))
|
||||
goto out;
|
||||
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||
|
||||
finish_compressed_bio_write(cb);
|
||||
out:
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
|
||||
struct compressed_bio *cb,
|
||||
struct bio *bio, int mirror_num)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
ASSERT(bio->bi_iter.bi_size);
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a compressed_bio, which will be used to read/write on-disk
|
||||
* (aka, compressed) * data.
|
||||
*
|
||||
* @cb: The compressed_bio structure, which records all the needed
|
||||
* information to bind the compressed data to the uncompressed
|
||||
* page cache.
|
||||
* @disk_byten: The logical bytenr where the compressed data will be read
|
||||
* from or written to.
|
||||
* @endio_func: The endio function to call after the IO for compressed data
|
||||
* is finished.
|
||||
* @next_stripe_start: Return value of logical bytenr of where next stripe starts.
|
||||
* Let the caller know to only fill the bio up to the stripe
|
||||
* boundary.
|
||||
*/
|
||||
|
||||
|
||||
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
|
||||
unsigned int opf, bio_end_io_t endio_func,
|
||||
u64 *next_stripe_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
struct btrfs_io_geometry geom;
|
||||
struct extent_map *em;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
|
||||
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bio->bi_opf = opf;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = endio_func;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
bio_put(bio);
|
||||
return ERR_CAST(em);
|
||||
}
|
||||
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
|
||||
bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
|
||||
|
||||
ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
|
||||
free_extent_map(em);
|
||||
if (ret < 0) {
|
||||
bio_put(bio);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
*next_stripe_start = disk_bytenr + geom.len;
|
||||
|
||||
return bio;
|
||||
}
|
||||
|
||||
/*
|
||||
* worker function to build and submit bios for previously compressed pages.
|
||||
* The corresponding pages in the inode should be marked for writeback
|
||||
@@ -396,20 +510,19 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct bio *bio = NULL;
|
||||
struct compressed_bio *cb;
|
||||
unsigned long bytes_left;
|
||||
int pg_index = 0;
|
||||
struct page *page;
|
||||
u64 first_byte = disk_start;
|
||||
u64 cur_disk_bytenr = disk_start;
|
||||
u64 next_stripe_start;
|
||||
blk_status_t ret;
|
||||
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
|
||||
const bool use_append = btrfs_use_zone_append(inode, disk_start);
|
||||
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
|
||||
|
||||
WARN_ON(!PAGE_ALIGNED(start));
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(len, fs_info->sectorsize));
|
||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||
if (!cb)
|
||||
return BLK_STS_RESOURCE;
|
||||
refcount_set(&cb->pending_bios, 0);
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
cb->errors = 0;
|
||||
cb->inode = &inode->vfs_inode;
|
||||
cb->start = start;
|
||||
@@ -420,118 +533,100 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
cb->orig_bio = NULL;
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
bio = btrfs_bio_alloc(first_byte);
|
||||
bio->bi_opf = bio_op | write_flags;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
while (cur_disk_bytenr < disk_start + compressed_len) {
|
||||
u64 offset = cur_disk_bytenr - disk_start;
|
||||
unsigned int index = offset >> PAGE_SHIFT;
|
||||
unsigned int real_size;
|
||||
unsigned int added;
|
||||
struct page *page = compressed_pages[index];
|
||||
bool submit = false;
|
||||
|
||||
if (use_append) {
|
||||
struct btrfs_device *device;
|
||||
|
||||
device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
|
||||
if (IS_ERR(device)) {
|
||||
kfree(cb);
|
||||
bio_put(bio);
|
||||
return BLK_STS_NOTSUPP;
|
||||
/* Allocate new bio if submitted or not yet allocated */
|
||||
if (!bio) {
|
||||
bio = alloc_compressed_bio(cb, cur_disk_bytenr,
|
||||
bio_op | write_flags, end_compressed_bio_write,
|
||||
&next_stripe_start);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = errno_to_blk_status(PTR_ERR(bio));
|
||||
bio = NULL;
|
||||
goto finish_cb;
|
||||
}
|
||||
}
|
||||
|
||||
bio_set_dev(bio, device->bdev);
|
||||
}
|
||||
|
||||
if (blkcg_css) {
|
||||
bio->bi_opf |= REQ_CGROUP_PUNT;
|
||||
kthread_associate_blkcg(blkcg_css);
|
||||
}
|
||||
refcount_set(&cb->pending_bios, 1);
|
||||
|
||||
/* create and submit bios for the compressed pages */
|
||||
bytes_left = compressed_len;
|
||||
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
|
||||
int submit = 0;
|
||||
int len = 0;
|
||||
|
||||
page = compressed_pages[pg_index];
|
||||
page->mapping = inode->vfs_inode.i_mapping;
|
||||
if (bio->bi_iter.bi_size)
|
||||
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
|
||||
0);
|
||||
/*
|
||||
* We should never reach next_stripe_start start as we will
|
||||
* submit comp_bio when reach the boundary immediately.
|
||||
*/
|
||||
ASSERT(cur_disk_bytenr != next_stripe_start);
|
||||
|
||||
/*
|
||||
* Page can only be added to bio if the current bio fits in
|
||||
* stripe.
|
||||
* We have various limits on the real read size:
|
||||
* - stripe boundary
|
||||
* - page boundary
|
||||
* - compressed length boundary
|
||||
*/
|
||||
if (!submit) {
|
||||
if (pg_index == 0 && use_append)
|
||||
len = bio_add_zone_append_page(bio, page,
|
||||
PAGE_SIZE, 0);
|
||||
else
|
||||
len = bio_add_page(bio, page, PAGE_SIZE, 0);
|
||||
}
|
||||
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
|
||||
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
|
||||
real_size = min_t(u64, real_size, compressed_len - offset);
|
||||
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
|
||||
|
||||
page->mapping = NULL;
|
||||
if (submit || len < PAGE_SIZE) {
|
||||
/*
|
||||
* inc the count before we submit the bio so
|
||||
* we know the end IO handler won't happen before
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
refcount_inc(&cb->pending_bios);
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (use_append)
|
||||
added = bio_add_zone_append_page(bio, page, real_size,
|
||||
offset_in_page(offset));
|
||||
else
|
||||
added = bio_add_page(bio, page, real_size,
|
||||
offset_in_page(offset));
|
||||
/* Reached zoned boundary */
|
||||
if (added == 0)
|
||||
submit = true;
|
||||
|
||||
cur_disk_bytenr += added;
|
||||
/* Reached stripe boundary */
|
||||
if (cur_disk_bytenr == next_stripe_start)
|
||||
submit = true;
|
||||
|
||||
/* Finished the range */
|
||||
if (cur_disk_bytenr == disk_start + compressed_len)
|
||||
submit = true;
|
||||
|
||||
if (submit) {
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
}
|
||||
|
||||
ret = btrfs_map_bio(fs_info, bio, 0);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
bio = btrfs_bio_alloc(first_byte);
|
||||
bio->bi_opf = bio_op | write_flags;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
if (blkcg_css)
|
||||
bio->bi_opf |= REQ_CGROUP_PUNT;
|
||||
/*
|
||||
* Use bio_add_page() to ensure the bio has at least one
|
||||
* page.
|
||||
*/
|
||||
bio_add_page(bio, page, PAGE_SIZE, 0);
|
||||
ret = submit_compressed_bio(fs_info, cb, bio, 0);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
bio = NULL;
|
||||
}
|
||||
if (bytes_left < PAGE_SIZE) {
|
||||
btrfs_info(fs_info,
|
||||
"bytes left %lu compress len %u nr %u",
|
||||
bytes_left, cb->compressed_len, cb->nr_pages);
|
||||
}
|
||||
bytes_left -= PAGE_SIZE;
|
||||
first_byte += PAGE_SIZE;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
|
||||
ret = btrfs_map_bio(fs_info, bio, 0);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
if (blkcg_css)
|
||||
kthread_associate_blkcg(NULL);
|
||||
|
||||
return 0;
|
||||
|
||||
finish_cb:
|
||||
if (bio) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
}
|
||||
/* Last byte of @cb is submitted, endio will free @cb */
|
||||
if (cur_disk_bytenr == disk_start + compressed_len)
|
||||
return ret;
|
||||
|
||||
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||
(disk_start + compressed_len - cur_disk_bytenr) >>
|
||||
fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Even with previous bio ended, we should still have io not yet
|
||||
* submitted, thus need to finish manually.
|
||||
*/
|
||||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_write(cb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 bio_end_offset(struct bio *bio)
|
||||
@@ -541,25 +636,33 @@ static u64 bio_end_offset(struct bio *bio)
|
||||
return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add extra pages in the same compressed file extent so that we don't need to
|
||||
* re-read the same extent again and again.
|
||||
*
|
||||
* NOTE: this won't work well for subpage, as for subpage read, we lock the
|
||||
* full page then submit bio for each compressed/regular extents.
|
||||
*
|
||||
* This means, if we have several sectors in the same page points to the same
|
||||
* on-disk compressed data, we will re-read the same extent many times and
|
||||
* this function can only help for the next page.
|
||||
*/
|
||||
static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
u64 compressed_end,
|
||||
struct compressed_bio *cb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
unsigned long end_index;
|
||||
unsigned long pg_index;
|
||||
u64 last_offset;
|
||||
u64 cur = bio_end_offset(cb->orig_bio);
|
||||
u64 isize = i_size_read(inode);
|
||||
int ret;
|
||||
struct page *page;
|
||||
unsigned long nr_pages = 0;
|
||||
struct extent_map *em;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_io_tree *tree;
|
||||
u64 end;
|
||||
int misses = 0;
|
||||
int sectors_missed = 0;
|
||||
|
||||
last_offset = bio_end_offset(cb->orig_bio);
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
@@ -578,18 +681,29 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
|
||||
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
|
||||
|
||||
while (last_offset < compressed_end) {
|
||||
pg_index = last_offset >> PAGE_SHIFT;
|
||||
while (cur < compressed_end) {
|
||||
u64 page_end;
|
||||
u64 pg_index = cur >> PAGE_SHIFT;
|
||||
u32 add_size;
|
||||
|
||||
if (pg_index > end_index)
|
||||
break;
|
||||
|
||||
page = xa_load(&mapping->i_pages, pg_index);
|
||||
if (page && !xa_is_value(page)) {
|
||||
misses++;
|
||||
if (misses > 4)
|
||||
sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
|
||||
fs_info->sectorsize_bits;
|
||||
|
||||
/* Beyond threshold, no need to continue */
|
||||
if (sectors_missed > 4)
|
||||
break;
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* Jump to next page start as we already have page for
|
||||
* current offset.
|
||||
*/
|
||||
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
|
||||
@@ -599,14 +713,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
|
||||
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
|
||||
put_page(page);
|
||||
goto next;
|
||||
/* There is already a page, skip to page end */
|
||||
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* at this point, we have a locked page in the page cache
|
||||
* for these bytes in the file. But, we have to make
|
||||
* sure they map to this compressed extent on disk.
|
||||
*/
|
||||
ret = set_page_extent_mapped(page);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
@@ -614,18 +725,22 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
break;
|
||||
}
|
||||
|
||||
end = last_offset + PAGE_SIZE - 1;
|
||||
lock_extent(tree, last_offset, end);
|
||||
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
|
||||
lock_extent(tree, cur, page_end);
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, last_offset,
|
||||
PAGE_SIZE);
|
||||
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
|
||||
read_unlock(&em_tree->lock);
|
||||
|
||||
if (!em || last_offset < em->start ||
|
||||
(last_offset + PAGE_SIZE > extent_map_end(em)) ||
|
||||
/*
|
||||
* At this point, we have a locked page in the page cache for
|
||||
* these bytes in the file. But, we have to make sure they map
|
||||
* to this compressed extent on disk.
|
||||
*/
|
||||
if (!em || cur < em->start ||
|
||||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
|
||||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
|
||||
free_extent_map(em);
|
||||
unlock_extent(tree, last_offset, end);
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
@@ -643,20 +758,23 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
ret = bio_add_page(cb->orig_bio, page,
|
||||
PAGE_SIZE, 0);
|
||||
|
||||
if (ret == PAGE_SIZE) {
|
||||
nr_pages++;
|
||||
put_page(page);
|
||||
} else {
|
||||
unlock_extent(tree, last_offset, end);
|
||||
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
|
||||
if (ret != add_size) {
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
next:
|
||||
last_offset += PAGE_SIZE;
|
||||
/*
|
||||
* If it's subpage, we also need to increase its
|
||||
* subpage::readers number, as at endio we will decrease
|
||||
* subpage::readers and to unlock the page.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE)
|
||||
btrfs_subpage_start_reader(fs_info, page, cur, add_size);
|
||||
put_page(page);
|
||||
cur += add_size;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -681,9 +799,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
unsigned int compressed_len;
|
||||
unsigned int nr_pages;
|
||||
unsigned int pg_index;
|
||||
struct page *page;
|
||||
struct bio *comp_bio;
|
||||
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
|
||||
struct bio *comp_bio = NULL;
|
||||
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||
u64 cur_disk_byte = disk_bytenr;
|
||||
u64 next_stripe_start;
|
||||
u64 file_offset;
|
||||
u64 em_len;
|
||||
u64 em_start;
|
||||
@@ -710,7 +829,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
if (!cb)
|
||||
goto out;
|
||||
|
||||
refcount_set(&cb->pending_bios, 0);
|
||||
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
@@ -750,86 +869,74 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
/* include any pages we added in add_ra-bio_pages */
|
||||
cb->len = bio->bi_iter.bi_size;
|
||||
|
||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
||||
comp_bio->bi_opf = REQ_OP_READ;
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
refcount_set(&cb->pending_bios, 1);
|
||||
|
||||
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
|
||||
u32 pg_len = PAGE_SIZE;
|
||||
int submit = 0;
|
||||
while (cur_disk_byte < disk_bytenr + compressed_len) {
|
||||
u64 offset = cur_disk_byte - disk_bytenr;
|
||||
unsigned int index = offset >> PAGE_SHIFT;
|
||||
unsigned int real_size;
|
||||
unsigned int added;
|
||||
struct page *page = cb->compressed_pages[index];
|
||||
bool submit = false;
|
||||
|
||||
/* Allocate new bio if submitted or not yet allocated */
|
||||
if (!comp_bio) {
|
||||
comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
|
||||
REQ_OP_READ, end_compressed_bio_read,
|
||||
&next_stripe_start);
|
||||
if (IS_ERR(comp_bio)) {
|
||||
ret = errno_to_blk_status(PTR_ERR(comp_bio));
|
||||
comp_bio = NULL;
|
||||
goto finish_cb;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* To handle subpage case, we need to make sure the bio only
|
||||
* covers the range we need.
|
||||
*
|
||||
* If we're at the last page, truncate the length to only cover
|
||||
* the remaining part.
|
||||
* We should never reach next_stripe_start start as we will
|
||||
* submit comp_bio when reach the boundary immediately.
|
||||
*/
|
||||
if (pg_index == nr_pages - 1)
|
||||
pg_len = min_t(u32, PAGE_SIZE,
|
||||
compressed_len - pg_index * PAGE_SIZE);
|
||||
ASSERT(cur_disk_byte != next_stripe_start);
|
||||
/*
|
||||
* We have various limit on the real read size:
|
||||
* - stripe boundary
|
||||
* - page boundary
|
||||
* - compressed length boundary
|
||||
*/
|
||||
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
|
||||
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
|
||||
real_size = min_t(u64, real_size, compressed_len - offset);
|
||||
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
|
||||
|
||||
page = cb->compressed_pages[pg_index];
|
||||
page->mapping = inode->i_mapping;
|
||||
page->index = em_start >> PAGE_SHIFT;
|
||||
added = bio_add_page(comp_bio, page, real_size, offset_in_page(offset));
|
||||
/*
|
||||
* Maximum compressed extent is smaller than bio size limit,
|
||||
* thus bio_add_page() should always success.
|
||||
*/
|
||||
ASSERT(added == real_size);
|
||||
cur_disk_byte += added;
|
||||
|
||||
if (comp_bio->bi_iter.bi_size)
|
||||
submit = btrfs_bio_fits_in_stripe(page, pg_len,
|
||||
comp_bio, 0);
|
||||
/* Reached stripe boundary, need to submit */
|
||||
if (cur_disk_byte == next_stripe_start)
|
||||
submit = true;
|
||||
|
||||
page->mapping = NULL;
|
||||
if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
|
||||
/* Has finished the range, need to submit */
|
||||
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||
submit = true;
|
||||
|
||||
if (submit) {
|
||||
unsigned int nr_sectors;
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
/*
|
||||
* inc the count before we submit the bio so
|
||||
* we know the end IO handler won't happen before
|
||||
* we inc the count. Otherwise, the cb might get
|
||||
* freed before we're done setting it up
|
||||
*/
|
||||
refcount_inc(&cb->pending_bios);
|
||||
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
|
||||
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||
fs_info->sectorsize);
|
||||
sums += fs_info->csum_size * nr_sectors;
|
||||
|
||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
||||
if (ret) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
|
||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
||||
comp_bio->bi_opf = REQ_OP_READ;
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
|
||||
bio_add_page(comp_bio, page, pg_len, 0);
|
||||
ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num);
|
||||
if (ret)
|
||||
goto finish_cb;
|
||||
comp_bio = NULL;
|
||||
}
|
||||
cur_disk_byte += pg_len;
|
||||
}
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
||||
if (ret) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail2:
|
||||
@@ -844,6 +951,26 @@ fail1:
|
||||
out:
|
||||
free_extent_map(em);
|
||||
return ret;
|
||||
finish_cb:
|
||||
if (comp_bio) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
/* All bytes of @cb is submitted, endio will free @cb */
|
||||
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||
return ret;
|
||||
|
||||
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||
(disk_bytenr + compressed_len - cur_disk_byte) >>
|
||||
fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Even with previous bio ended, we should still have io not yet
|
||||
* submitted, thus need to finish @cb manually.
|
||||
*/
|
||||
ASSERT(refcount_read(&cb->pending_sectors));
|
||||
/* Now we are the only one referring @cb, can finish it safely. */
|
||||
finish_compressed_bio_read(cb, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -28,8 +28,8 @@ struct btrfs_inode;
|
||||
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
||||
|
||||
struct compressed_bio {
|
||||
/* number of bios pending for this compressed extent */
|
||||
refcount_t pending_bios;
|
||||
/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
|
||||
refcount_t pending_sectors;
|
||||
|
||||
/* Number of compressed pages in the array */
|
||||
unsigned int nr_pages;
|
||||
|
||||
156
fs/btrfs/ctree.c
156
fs/btrfs/ctree.c
@@ -396,7 +396,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
||||
if (*cow_ret == buf)
|
||||
unlock_orig = 1;
|
||||
|
||||
btrfs_assert_tree_locked(buf);
|
||||
btrfs_assert_tree_write_locked(buf);
|
||||
|
||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
trans->transid != fs_info->running_transaction->transid);
|
||||
@@ -2488,7 +2488,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
|
||||
int ret;
|
||||
|
||||
BUG_ON(!path->nodes[level]);
|
||||
btrfs_assert_tree_locked(path->nodes[level]);
|
||||
btrfs_assert_tree_write_locked(path->nodes[level]);
|
||||
lower = path->nodes[level];
|
||||
nritems = btrfs_header_nritems(lower);
|
||||
BUG_ON(slot > nritems);
|
||||
@@ -2828,7 +2828,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
if (slot >= btrfs_header_nritems(upper) - 1)
|
||||
return 1;
|
||||
|
||||
btrfs_assert_tree_locked(path->nodes[1]);
|
||||
btrfs_assert_tree_write_locked(path->nodes[1]);
|
||||
|
||||
right = btrfs_read_node_slot(upper, slot + 1);
|
||||
/*
|
||||
@@ -3066,7 +3066,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
if (right_nritems == 0)
|
||||
return 1;
|
||||
|
||||
btrfs_assert_tree_locked(path->nodes[1]);
|
||||
btrfs_assert_tree_write_locked(path->nodes[1]);
|
||||
|
||||
left = btrfs_read_node_slot(path->nodes[1], slot - 1);
|
||||
/*
|
||||
@@ -3581,40 +3581,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function duplicate a item, giving 'new_key' to the new item.
|
||||
* It guarantees both items live in the same tree leaf and the new item
|
||||
* is contiguous with the original item.
|
||||
*
|
||||
* This allows us to split file extent in place, keeping a lock on the
|
||||
* leaf the entire time.
|
||||
*/
|
||||
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *new_key)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
int ret;
|
||||
u32 item_size;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
ret = setup_leaf_for_split(trans, root, path,
|
||||
item_size + sizeof(struct btrfs_item));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
path->slots[0]++;
|
||||
setup_items_for_insert(root, path, new_key, &item_size, 1);
|
||||
leaf = path->nodes[0];
|
||||
memcpy_extent_buffer(leaf,
|
||||
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
||||
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
|
||||
item_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* make the item pointed to by the path smaller. new_size indicates
|
||||
* how small to make it, and from_end tells us if we just chop bytes
|
||||
@@ -3786,13 +3752,10 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
|
||||
*
|
||||
* @root: root we are inserting items to
|
||||
* @path: points to the leaf/slot where we are going to insert new items
|
||||
* @cpu_key: array of keys for items to be inserted
|
||||
* @data_size: size of the body of each item we are going to insert
|
||||
* @nr: size of @cpu_key/@data_size arrays
|
||||
* @batch: information about the batch of items to insert
|
||||
*/
|
||||
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
||||
int nr)
|
||||
static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
const struct btrfs_item_batch *batch)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_item *item;
|
||||
@@ -3804,14 +3767,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
int slot;
|
||||
struct btrfs_map_token token;
|
||||
u32 total_size;
|
||||
u32 total_data = 0;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
total_data += data_size[i];
|
||||
total_size = total_data + (nr * sizeof(struct btrfs_item));
|
||||
|
||||
/*
|
||||
* Before anything else, update keys in the parent and other ancestors
|
||||
* if needed, then release the write locks on them, so that other tasks
|
||||
* can use them while we modify the leaf.
|
||||
*/
|
||||
if (path->slots[0] == 0) {
|
||||
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
|
||||
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
|
||||
fixup_low_keys(path, &disk_key, 1);
|
||||
}
|
||||
btrfs_unlock_up_safe(path, 1);
|
||||
@@ -3821,6 +3784,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
|
||||
nritems = btrfs_header_nritems(leaf);
|
||||
data_end = leaf_data_end(leaf);
|
||||
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
|
||||
|
||||
if (btrfs_leaf_free_space(leaf) < total_size) {
|
||||
btrfs_print_leaf(leaf);
|
||||
@@ -3850,31 +3814,32 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
item = btrfs_item_nr(i);
|
||||
ioff = btrfs_token_item_offset(&token, item);
|
||||
btrfs_set_token_item_offset(&token, item,
|
||||
ioff - total_data);
|
||||
ioff - batch->total_data_size);
|
||||
}
|
||||
/* shift the items */
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
|
||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
|
||||
btrfs_item_nr_offset(slot),
|
||||
(nritems - slot) * sizeof(struct btrfs_item));
|
||||
|
||||
/* shift the data */
|
||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
|
||||
data_end, old_data - data_end);
|
||||
data_end - batch->total_data_size,
|
||||
BTRFS_LEAF_DATA_OFFSET + data_end,
|
||||
old_data - data_end);
|
||||
data_end = old_data;
|
||||
}
|
||||
|
||||
/* setup the item for the new data */
|
||||
for (i = 0; i < nr; i++) {
|
||||
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
|
||||
for (i = 0; i < batch->nr; i++) {
|
||||
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
|
||||
btrfs_set_item_key(leaf, &disk_key, slot + i);
|
||||
item = btrfs_item_nr(slot + i);
|
||||
data_end -= data_size[i];
|
||||
data_end -= batch->data_sizes[i];
|
||||
btrfs_set_token_item_offset(&token, item, data_end);
|
||||
btrfs_set_token_item_size(&token, item, data_size[i]);
|
||||
btrfs_set_token_item_size(&token, item, batch->data_sizes[i]);
|
||||
}
|
||||
|
||||
btrfs_set_header_nritems(leaf, nritems + nr);
|
||||
btrfs_set_header_nritems(leaf, nritems + batch->nr);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
if (btrfs_leaf_free_space(leaf) < 0) {
|
||||
@@ -3883,6 +3848,29 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new item into a leaf.
|
||||
*
|
||||
* @root: The root of the btree.
|
||||
* @path: A path pointing to the target leaf and slot.
|
||||
* @key: The key of the new item.
|
||||
* @data_size: The size of the data associated with the new key.
|
||||
*/
|
||||
void btrfs_setup_item_for_insert(struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *key,
|
||||
u32 data_size)
|
||||
{
|
||||
struct btrfs_item_batch batch;
|
||||
|
||||
batch.keys = key;
|
||||
batch.data_sizes = &data_size;
|
||||
batch.total_data_size = data_size;
|
||||
batch.nr = 1;
|
||||
|
||||
setup_items_for_insert(root, path, &batch);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a key and some data, insert items into the tree.
|
||||
* This does all the path init required, making room in the tree if needed.
|
||||
@@ -3890,20 +3878,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
||||
int nr)
|
||||
const struct btrfs_item_batch *batch)
|
||||
{
|
||||
int ret = 0;
|
||||
int slot;
|
||||
int i;
|
||||
u32 total_size = 0;
|
||||
u32 total_data = 0;
|
||||
u32 total_size;
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
total_data += data_size[i];
|
||||
|
||||
total_size = total_data + (nr * sizeof(struct btrfs_item));
|
||||
ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
|
||||
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
|
||||
ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
|
||||
if (ret == 0)
|
||||
return -EEXIST;
|
||||
if (ret < 0)
|
||||
@@ -3912,7 +3894,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||
slot = path->slots[0];
|
||||
BUG_ON(slot < 0);
|
||||
|
||||
setup_items_for_insert(root, path, cpu_key, data_size, nr);
|
||||
setup_items_for_insert(root, path, batch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3943,6 +3925,40 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function duplicates an item, giving 'new_key' to the new item.
|
||||
* It guarantees both items live in the same tree leaf and the new item is
|
||||
* contiguous with the original item.
|
||||
*
|
||||
* This allows us to split a file extent in place, keeping a lock on the leaf
|
||||
* the entire time.
|
||||
*/
|
||||
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *new_key)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
int ret;
|
||||
u32 item_size;
|
||||
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||
ret = setup_leaf_for_split(trans, root, path,
|
||||
item_size + sizeof(struct btrfs_item));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
path->slots[0]++;
|
||||
btrfs_setup_item_for_insert(root, path, new_key, item_size);
|
||||
leaf = path->nodes[0];
|
||||
memcpy_extent_buffer(leaf,
|
||||
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
||||
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
|
||||
item_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* delete the pointer from a given node.
|
||||
*
|
||||
|
||||
@@ -48,6 +48,7 @@ extern struct kmem_cache *btrfs_free_space_cachep;
|
||||
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
struct btrfs_ordered_sum;
|
||||
struct btrfs_ref;
|
||||
struct btrfs_bio;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
@@ -217,6 +218,9 @@ struct btrfs_root_backup {
|
||||
u8 unused_8[10];
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||
|
||||
/*
|
||||
* the super block basically lists the main trees of the FS
|
||||
* it currently lacks any block count etc etc
|
||||
@@ -269,7 +273,11 @@ struct btrfs_super_block {
|
||||
__le64 reserved[28];
|
||||
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
|
||||
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
|
||||
|
||||
/* Padded to 4096 bytes */
|
||||
u8 padding[565];
|
||||
} __attribute__ ((__packed__));
|
||||
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
|
||||
/*
|
||||
* Compat flags that we support. If any incompat flags are set other than the
|
||||
@@ -899,6 +907,7 @@ struct btrfs_fs_info {
|
||||
struct btrfs_workqueue *scrub_workers;
|
||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||
struct btrfs_workqueue *scrub_parity_workers;
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
struct btrfs_discard_ctl discard_ctl;
|
||||
|
||||
@@ -1017,6 +1026,16 @@ struct btrfs_fs_info {
|
||||
spinlock_t treelog_bg_lock;
|
||||
u64 treelog_bg;
|
||||
|
||||
/*
|
||||
* Start of the dedicated data relocation block group, protected by
|
||||
* relocation_bg_lock.
|
||||
*/
|
||||
spinlock_t relocation_bg_lock;
|
||||
u64 data_reloc_bg;
|
||||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
struct list_head zone_active_bgs;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
@@ -2885,16 +2904,42 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
|
||||
return btrfs_del_items(trans, root, path, path->slots[0], 1);
|
||||
}
|
||||
|
||||
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
||||
int nr);
|
||||
/*
|
||||
* Describes a batch of items to insert in a btree. This is used by
|
||||
* btrfs_insert_empty_items().
|
||||
*/
|
||||
struct btrfs_item_batch {
|
||||
/*
|
||||
* Pointer to an array containing the keys of the items to insert (in
|
||||
* sorted order).
|
||||
*/
|
||||
const struct btrfs_key *keys;
|
||||
/* Pointer to an array containing the data size for each item to insert. */
|
||||
const u32 *data_sizes;
|
||||
/*
|
||||
* The sum of data sizes for all items. The caller can compute this while
|
||||
* setting up the data_sizes array, so it ends up being more efficient
|
||||
* than having btrfs_insert_empty_items() or setup_item_for_insert()
|
||||
* doing it, as it would avoid an extra loop over a potentially large
|
||||
* array, and in the case of setup_item_for_insert(), we would be doing
|
||||
* it while holding a write lock on a leaf and often on upper level nodes
|
||||
* too, unnecessarily increasing the size of a critical section.
|
||||
*/
|
||||
u32 total_data_size;
|
||||
/* Size of the keys and data_sizes arrays (number of items in the batch). */
|
||||
int nr;
|
||||
};
|
||||
|
||||
void btrfs_setup_item_for_insert(struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *key,
|
||||
u32 data_size);
|
||||
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
const struct btrfs_key *key, void *data, u32 data_size);
|
||||
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
||||
int nr);
|
||||
const struct btrfs_item_batch *batch);
|
||||
|
||||
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
@@ -2902,7 +2947,14 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
|
||||
const struct btrfs_key *key,
|
||||
u32 data_size)
|
||||
{
|
||||
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
|
||||
struct btrfs_item_batch batch;
|
||||
|
||||
batch.keys = key;
|
||||
batch.data_sizes = &data_size;
|
||||
batch.total_data_size = data_size;
|
||||
batch.nr = 1;
|
||||
|
||||
return btrfs_insert_empty_items(trans, root, path, &batch);
|
||||
}
|
||||
|
||||
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
|
||||
@@ -3129,8 +3181,9 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||
/* inode.c */
|
||||
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
|
||||
struct page *page, u64 start, u64 end);
|
||||
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page,
|
||||
u64 start, u64 end);
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
u64 start, u64 len);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
@@ -3142,7 +3195,6 @@ void __btrfs_del_delalloc_inode(struct btrfs_root *root,
|
||||
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
||||
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
|
||||
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_inode *dir, struct btrfs_inode *inode,
|
||||
const char *name, int name_len);
|
||||
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
||||
@@ -3174,8 +3226,6 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
|
||||
struct extent_state *other);
|
||||
void btrfs_split_delalloc_extent(struct inode *inode,
|
||||
struct extent_state *orig, u64 split);
|
||||
int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
|
||||
unsigned long bio_flags);
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
||||
int btrfs_readpage(struct file *file, struct page *page);
|
||||
@@ -3242,9 +3292,9 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
|
||||
int btrfs_ioctl_get_supported_features(void __user *arg);
|
||||
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||
int __pure btrfs_is_empty_uuid(u8 *uuid);
|
||||
int btrfs_defrag_file(struct inode *inode, struct file *file,
|
||||
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||
struct btrfs_ioctl_defrag_range_args *range,
|
||||
u64 newer_than, unsigned long max_pages);
|
||||
u64 newer_than, unsigned long max_to_defrag);
|
||||
void btrfs_get_block_group_info(struct list_head *groups_list,
|
||||
struct btrfs_ioctl_space_info *space);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
@@ -3563,6 +3613,9 @@ do { \
|
||||
(errno), fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||
@@ -3842,6 +3895,11 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
return fs_info->zoned != 0;
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
||||
{
|
||||
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use page status Private2 to indicate there is an ordered extent with
|
||||
* unfinished IO.
|
||||
|
||||
@@ -679,19 +679,18 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_delayed_item *first_item)
|
||||
{
|
||||
LIST_HEAD(batch);
|
||||
LIST_HEAD(item_list);
|
||||
struct btrfs_delayed_item *curr;
|
||||
struct btrfs_delayed_item *next;
|
||||
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
|
||||
struct btrfs_item_batch batch;
|
||||
int total_size;
|
||||
int nitems;
|
||||
char *ins_data = NULL;
|
||||
struct btrfs_key *ins_keys;
|
||||
u32 *ins_sizes;
|
||||
int ret;
|
||||
|
||||
list_add_tail(&first_item->tree_list, &batch);
|
||||
nitems = 1;
|
||||
list_add_tail(&first_item->tree_list, &item_list);
|
||||
batch.total_data_size = first_item->data_len;
|
||||
batch.nr = 1;
|
||||
total_size = first_item->data_len + sizeof(struct btrfs_item);
|
||||
curr = first_item;
|
||||
|
||||
@@ -706,39 +705,43 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
if (total_size + next_size > max_size)
|
||||
break;
|
||||
|
||||
list_add_tail(&next->tree_list, &batch);
|
||||
nitems++;
|
||||
list_add_tail(&next->tree_list, &item_list);
|
||||
batch.nr++;
|
||||
total_size += next_size;
|
||||
batch.total_data_size += next->data_len;
|
||||
curr = next;
|
||||
}
|
||||
|
||||
if (nitems == 1) {
|
||||
ins_keys = &first_item->key;
|
||||
ins_sizes = &first_item->data_len;
|
||||
if (batch.nr == 1) {
|
||||
batch.keys = &first_item->key;
|
||||
batch.data_sizes = &first_item->data_len;
|
||||
} else {
|
||||
struct btrfs_key *ins_keys;
|
||||
u32 *ins_sizes;
|
||||
int i = 0;
|
||||
|
||||
ins_data = kmalloc(nitems * sizeof(u32) +
|
||||
nitems * sizeof(struct btrfs_key), GFP_NOFS);
|
||||
ins_data = kmalloc(batch.nr * sizeof(u32) +
|
||||
batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
|
||||
if (!ins_data) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ins_sizes = (u32 *)ins_data;
|
||||
ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
|
||||
list_for_each_entry(curr, &batch, tree_list) {
|
||||
ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
|
||||
batch.keys = ins_keys;
|
||||
batch.data_sizes = ins_sizes;
|
||||
list_for_each_entry(curr, &item_list, tree_list) {
|
||||
ins_keys[i] = curr->key;
|
||||
ins_sizes[i] = curr->data_len;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
|
||||
nitems);
|
||||
ret = btrfs_insert_empty_items(trans, root, path, &batch);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
list_for_each_entry(curr, &batch, tree_list) {
|
||||
list_for_each_entry(curr, &item_list, tree_list) {
|
||||
char *data_ptr;
|
||||
|
||||
data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
|
||||
@@ -754,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
|
||||
list_for_each_entry_safe(curr, next, &batch, tree_list) {
|
||||
list_for_each_entry_safe(curr, next, &item_list, tree_list) {
|
||||
list_del(&curr->tree_list);
|
||||
btrfs_delayed_item_release_metadata(root, curr);
|
||||
btrfs_release_delayed_item(curr);
|
||||
|
||||
@@ -906,7 +906,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
||||
u64 parent = generic_ref->parent;
|
||||
u8 ref_type;
|
||||
|
||||
is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
|
||||
is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
|
||||
|
||||
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
|
||||
BUG_ON(extent_op && extent_op->is_data);
|
||||
@@ -921,8 +921,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||
is_fstree(generic_ref->real_root) &&
|
||||
is_fstree(generic_ref->tree_ref.root) &&
|
||||
!generic_ref->skip_qgroup) {
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record) {
|
||||
@@ -938,14 +936,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
||||
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
|
||||
|
||||
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
|
||||
generic_ref->tree_ref.root, action, ref_type);
|
||||
ref->root = generic_ref->tree_ref.root;
|
||||
generic_ref->tree_ref.owning_root, action,
|
||||
ref_type);
|
||||
ref->root = generic_ref->tree_ref.owning_root;
|
||||
ref->parent = parent;
|
||||
ref->level = level;
|
||||
|
||||
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
|
||||
generic_ref->tree_ref.root, 0, action, false,
|
||||
is_system);
|
||||
generic_ref->tree_ref.owning_root, 0, action,
|
||||
false, is_system);
|
||||
head_ref->extent_op = extent_op;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
@@ -997,7 +996,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr = generic_ref->bytenr;
|
||||
u64 num_bytes = generic_ref->len;
|
||||
u64 parent = generic_ref->parent;
|
||||
u64 ref_root = generic_ref->data_ref.ref_root;
|
||||
u64 ref_root = generic_ref->data_ref.owning_root;
|
||||
u64 owner = generic_ref->data_ref.ino;
|
||||
u64 offset = generic_ref->data_ref.offset;
|
||||
u8 ref_type;
|
||||
@@ -1026,8 +1025,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||
is_fstree(ref_root) &&
|
||||
is_fstree(generic_ref->real_root) &&
|
||||
!generic_ref->skip_qgroup) {
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record) {
|
||||
|
||||
@@ -186,8 +186,8 @@ enum btrfs_ref_type {
|
||||
struct btrfs_data_ref {
|
||||
/* For EXTENT_DATA_REF */
|
||||
|
||||
/* Root which refers to this data extent */
|
||||
u64 ref_root;
|
||||
/* Original root this data extent belongs to */
|
||||
u64 owning_root;
|
||||
|
||||
/* Inode which refers to this data extent */
|
||||
u64 ino;
|
||||
@@ -210,11 +210,11 @@ struct btrfs_tree_ref {
|
||||
int level;
|
||||
|
||||
/*
|
||||
* Root which refers to this tree block.
|
||||
* Root which owns this tree block.
|
||||
*
|
||||
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
|
||||
*/
|
||||
u64 root;
|
||||
u64 owning_root;
|
||||
|
||||
/* For non-skinny metadata, no special member needed */
|
||||
};
|
||||
@@ -231,17 +231,10 @@ struct btrfs_ref {
|
||||
*/
|
||||
bool skip_qgroup;
|
||||
|
||||
/*
|
||||
* Optional. For which root is this modification.
|
||||
* Mostly used for qgroup optimization.
|
||||
*
|
||||
* When unset, data/tree ref init code will populate it.
|
||||
* In certain cases, we're modifying reference for a different root.
|
||||
* E.g. COW fs tree blocks for balance.
|
||||
* In that case, tree_ref::root will be fs tree, but we're doing this
|
||||
* for reloc tree, then we should set @real_root to reloc tree.
|
||||
*/
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
/* Through which root is this modification. */
|
||||
u64 real_root;
|
||||
#endif
|
||||
u64 bytenr;
|
||||
u64 len;
|
||||
|
||||
@@ -271,26 +264,40 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
|
||||
}
|
||||
|
||||
static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
|
||||
int level, u64 root)
|
||||
int level, u64 root, u64 mod_root, bool skip_qgroup)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
/* If @real_root not set, use @root as fallback */
|
||||
if (!generic_ref->real_root)
|
||||
generic_ref->real_root = root;
|
||||
generic_ref->real_root = mod_root ?: root;
|
||||
#endif
|
||||
generic_ref->tree_ref.level = level;
|
||||
generic_ref->tree_ref.root = root;
|
||||
generic_ref->tree_ref.owning_root = root;
|
||||
generic_ref->type = BTRFS_REF_METADATA;
|
||||
if (skip_qgroup || !(is_fstree(root) &&
|
||||
(!mod_root || is_fstree(mod_root))))
|
||||
generic_ref->skip_qgroup = true;
|
||||
else
|
||||
generic_ref->skip_qgroup = false;
|
||||
|
||||
}
|
||||
|
||||
static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
|
||||
u64 ref_root, u64 ino, u64 offset)
|
||||
u64 ref_root, u64 ino, u64 offset, u64 mod_root,
|
||||
bool skip_qgroup)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
/* If @real_root not set, use @root as fallback */
|
||||
if (!generic_ref->real_root)
|
||||
generic_ref->real_root = ref_root;
|
||||
generic_ref->data_ref.ref_root = ref_root;
|
||||
generic_ref->real_root = mod_root ?: ref_root;
|
||||
#endif
|
||||
generic_ref->data_ref.owning_root = ref_root;
|
||||
generic_ref->data_ref.ino = ino;
|
||||
generic_ref->data_ref.offset = offset;
|
||||
generic_ref->type = BTRFS_REF_DATA;
|
||||
if (skip_qgroup || !(is_fstree(ref_root) &&
|
||||
(!mod_root || is_fstree(mod_root))))
|
||||
generic_ref->skip_qgroup = true;
|
||||
else
|
||||
generic_ref->skip_qgroup = false;
|
||||
}
|
||||
|
||||
static inline struct btrfs_delayed_extent_op *
|
||||
|
||||
@@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data);
|
||||
|
||||
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID };
|
||||
struct btrfs_key key;
|
||||
struct btrfs_root *dev_root = fs_info->dev_root;
|
||||
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
|
||||
@@ -100,8 +101,7 @@ no_valid_dev_replace_entry_found:
|
||||
* We don't have a replace item or it's corrupted. If there is
|
||||
* a replace target, fail the mount.
|
||||
*/
|
||||
if (btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
||||
if (btrfs_find_device(fs_info->fs_devices, &args)) {
|
||||
btrfs_err(fs_info,
|
||||
"found replace target device without a valid replace item");
|
||||
ret = -EUCLEAN;
|
||||
@@ -163,8 +163,7 @@ no_valid_dev_replace_entry_found:
|
||||
* We don't have an active replace item but if there is a
|
||||
* replace target, fail the mount.
|
||||
*/
|
||||
if (btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
||||
if (btrfs_find_device(fs_info->fs_devices, &args)) {
|
||||
btrfs_err(fs_info,
|
||||
"replace devid present without an active replace item");
|
||||
ret = -EUCLEAN;
|
||||
@@ -175,11 +174,10 @@ no_valid_dev_replace_entry_found:
|
||||
break;
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
|
||||
src_devid, NULL, NULL);
|
||||
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID,
|
||||
NULL, NULL);
|
||||
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
args.devid = src_devid;
|
||||
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
|
||||
/*
|
||||
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
||||
* missing
|
||||
|
||||
@@ -683,7 +683,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
||||
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||
struct page *page, u64 start, u64 end,
|
||||
int mirror)
|
||||
{
|
||||
@@ -1036,7 +1036,7 @@ static int btree_set_page_dirty(struct page *page)
|
||||
BUG_ON(!eb);
|
||||
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||
BUG_ON(!atomic_read(&eb->refs));
|
||||
btrfs_assert_tree_locked(eb);
|
||||
btrfs_assert_tree_write_locked(eb);
|
||||
return __set_page_dirty_nobuffers(page);
|
||||
}
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
@@ -1061,7 +1061,7 @@ static int btree_set_page_dirty(struct page *page)
|
||||
ASSERT(eb);
|
||||
ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||
ASSERT(atomic_read(&eb->refs));
|
||||
btrfs_assert_tree_locked(eb);
|
||||
btrfs_assert_tree_write_locked(eb);
|
||||
free_extent_buffer(eb);
|
||||
|
||||
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
|
||||
@@ -1125,7 +1125,7 @@ void btrfs_clean_tree_block(struct extent_buffer *buf)
|
||||
struct btrfs_fs_info *fs_info = buf->fs_info;
|
||||
if (btrfs_header_generation(buf) ==
|
||||
fs_info->running_transaction->transid) {
|
||||
btrfs_assert_tree_locked(buf);
|
||||
btrfs_assert_tree_write_locked(buf);
|
||||
|
||||
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
|
||||
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
|
||||
@@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
|
||||
goto fail;
|
||||
|
||||
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
|
||||
root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
||||
!btrfs_is_data_reloc_root(root)) {
|
||||
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
|
||||
btrfs_check_and_init_root_item(&root->root_item);
|
||||
}
|
||||
@@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
||||
btrfs_extent_buffer_leak_debug_check(fs_info);
|
||||
kfree(fs_info->super_copy);
|
||||
kfree(fs_info->super_for_commit);
|
||||
kfree(fs_info->subpage_info);
|
||||
kvfree(fs_info);
|
||||
}
|
||||
|
||||
@@ -1953,8 +1954,7 @@ sleep:
|
||||
wake_up_process(fs_info->cleaner_kthread);
|
||||
mutex_unlock(&fs_info->transaction_kthread_mutex);
|
||||
|
||||
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
|
||||
&fs_info->fs_state)))
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
btrfs_cleanup_transaction(fs_info);
|
||||
if (!kthread_should_stop() &&
|
||||
(!btrfs_transaction_blocked(fs_info) ||
|
||||
@@ -2592,8 +2592,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/*
|
||||
* For 4K page size, we only support 4K sector size.
|
||||
* For 64K page size, we support read-write for 64K sector size, and
|
||||
* read-only for 4K sector size.
|
||||
* For 64K page size, we support 64K and 4K sector sizes.
|
||||
*/
|
||||
if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
|
||||
(PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
|
||||
@@ -2883,6 +2882,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
spin_lock_init(&fs_info->buffer_lock);
|
||||
spin_lock_init(&fs_info->unused_bgs_lock);
|
||||
spin_lock_init(&fs_info->treelog_bg_lock);
|
||||
spin_lock_init(&fs_info->zone_active_bgs_lock);
|
||||
spin_lock_init(&fs_info->relocation_bg_lock);
|
||||
rwlock_init(&fs_info->tree_mod_log_lock);
|
||||
mutex_init(&fs_info->unused_bg_unpin_mutex);
|
||||
mutex_init(&fs_info->reclaim_bgs_lock);
|
||||
@@ -2896,6 +2897,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
||||
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
||||
INIT_LIST_HEAD(&fs_info->zone_active_bgs);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
||||
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
||||
@@ -3228,12 +3230,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
|
||||
btrfs_init_btree_inode(fs_info);
|
||||
|
||||
invalidate_bdev(fs_devices->latest_bdev);
|
||||
invalidate_bdev(fs_devices->latest_dev->bdev);
|
||||
|
||||
/*
|
||||
* Read super block and check the signature bytes only
|
||||
*/
|
||||
disk_super = btrfs_read_dev_super(fs_devices->latest_bdev);
|
||||
disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
|
||||
if (IS_ERR(disk_super)) {
|
||||
err = PTR_ERR(disk_super);
|
||||
goto fail_alloc;
|
||||
@@ -3392,12 +3394,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
if (sectorsize != PAGE_SIZE) {
|
||||
if (sectorsize < PAGE_SIZE) {
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
btrfs_warn(fs_info,
|
||||
"read-write for sector size %u with page size %lu is experimental",
|
||||
sectorsize, PAGE_SIZE);
|
||||
}
|
||||
if (sectorsize != PAGE_SIZE) {
|
||||
if (btrfs_super_incompat_flags(fs_info->super_copy) &
|
||||
BTRFS_FEATURE_INCOMPAT_RAID56) {
|
||||
btrfs_err(fs_info,
|
||||
@@ -3406,6 +3408,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
|
||||
if (!subpage_info)
|
||||
goto fail_alloc;
|
||||
btrfs_init_subpage_info(subpage_info, sectorsize);
|
||||
fs_info->subpage_info = subpage_info;
|
||||
}
|
||||
|
||||
ret = btrfs_init_workqueues(fs_info, fs_devices);
|
||||
@@ -3465,7 +3472,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
* below in btrfs_init_dev_replace().
|
||||
*/
|
||||
btrfs_free_extra_devids(fs_devices);
|
||||
if (!fs_devices->latest_bdev) {
|
||||
if (!fs_devices->latest_dev->bdev) {
|
||||
btrfs_err(fs_info, "failed to read devices");
|
||||
goto fail_tree_roots;
|
||||
}
|
||||
@@ -3556,7 +3563,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_sysfs;
|
||||
}
|
||||
|
||||
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
|
||||
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
|
||||
!btrfs_check_rw_degradable(fs_info, NULL)) {
|
||||
btrfs_warn(fs_info,
|
||||
"writable mount is not allowed due to too many missing devices");
|
||||
goto fail_sysfs;
|
||||
@@ -3881,7 +3889,9 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
bio->bi_opf |= REQ_FUA;
|
||||
|
||||
btrfsic_submit_bio(bio);
|
||||
btrfs_advance_sb_log(device, i);
|
||||
|
||||
if (btrfs_advance_sb_log(device, i))
|
||||
errors++;
|
||||
}
|
||||
return errors < i ? 0 : -1;
|
||||
}
|
||||
@@ -4221,7 +4231,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
drop_ref = true;
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
ASSERT(root->log_root == NULL);
|
||||
if (root->reloc_root) {
|
||||
btrfs_put_root(root->reloc_root);
|
||||
@@ -4372,8 +4382,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
btrfs_err(fs_info, "commit super ret %d", ret);
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) ||
|
||||
test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state))
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
btrfs_error_commit_super(fs_info);
|
||||
|
||||
kthread_stop(fs_info->transaction_kthread);
|
||||
@@ -4470,7 +4479,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
|
||||
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
|
||||
return;
|
||||
#endif
|
||||
btrfs_assert_tree_locked(buf);
|
||||
btrfs_assert_tree_write_locked(buf);
|
||||
if (transid != fs_info->generation)
|
||||
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
|
||||
buf->start, transid, fs_info->generation);
|
||||
|
||||
@@ -6,9 +6,6 @@
|
||||
#ifndef BTRFS_DISK_IO_H
|
||||
#define BTRFS_DISK_IO_H
|
||||
|
||||
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||
|
||||
#define BTRFS_SUPER_MIRROR_MAX 3
|
||||
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
||||
|
||||
@@ -81,7 +78,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
||||
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||
struct page *page, u64 start, u64 end,
|
||||
int mirror);
|
||||
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
||||
|
||||
@@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
|
||||
static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
|
||||
{
|
||||
struct btrfs_device *dev = stripe->dev;
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
@@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 discarded_bytes = 0;
|
||||
u64 end = bytenr + num_bytes;
|
||||
u64 cur = bytenr;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bbio has devices
|
||||
* Avoid races with device replace and make sure our bioc has devices
|
||||
* associated to its stripes that don't go away while we are discarding.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
while (cur < end) {
|
||||
struct btrfs_bio_stripe *stripe;
|
||||
struct btrfs_io_stripe *stripe;
|
||||
int i;
|
||||
|
||||
num_bytes = end - cur;
|
||||
/* Tell the block device(s) that the sectors can be discarded */
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
|
||||
&num_bytes, &bbio, 0);
|
||||
&num_bytes, &bioc, 0);
|
||||
/*
|
||||
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
|
||||
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
|
||||
@@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
stripe = bbio->stripes;
|
||||
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
|
||||
stripe = bioc->stripes;
|
||||
for (i = 0; i < bioc->num_stripes; i++, stripe++) {
|
||||
u64 bytes;
|
||||
struct btrfs_device *device = stripe->dev;
|
||||
|
||||
@@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
* And since there are two loops, explicitly
|
||||
* go to out to avoid confusion.
|
||||
*/
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
*/
|
||||
ret = 0;
|
||||
}
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
cur += num_bytes;
|
||||
}
|
||||
out:
|
||||
@@ -1397,7 +1396,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
|
||||
generic_ref->action);
|
||||
BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
|
||||
generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
|
||||
generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
|
||||
|
||||
if (generic_ref->type == BTRFS_REF_METADATA)
|
||||
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
|
||||
@@ -2376,7 +2375,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
|
||||
if (btrfs_is_data_reloc_root(root))
|
||||
WARN_ON(ret > 0);
|
||||
return ret;
|
||||
}
|
||||
@@ -2438,10 +2437,9 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
||||
key.offset -= btrfs_file_extent_offset(buf, fi);
|
||||
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
||||
num_bytes, parent);
|
||||
generic_ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
|
||||
key.offset);
|
||||
generic_ref.skip_qgroup = for_reloc;
|
||||
key.offset, root->root_key.objectid,
|
||||
for_reloc);
|
||||
if (inc)
|
||||
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
||||
else
|
||||
@@ -2453,9 +2451,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
||||
num_bytes = fs_info->nodesize;
|
||||
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
||||
num_bytes, parent);
|
||||
generic_ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
|
||||
generic_ref.skip_qgroup = for_reloc;
|
||||
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
|
||||
root->root_key.objectid, for_reloc);
|
||||
if (inc)
|
||||
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
||||
else
|
||||
@@ -3196,7 +3193,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
|
||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
@@ -3289,7 +3286,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
|
||||
buf->start, buf->len, parent);
|
||||
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
|
||||
root->root_key.objectid);
|
||||
root->root_key.objectid, 0, false);
|
||||
|
||||
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
||||
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
||||
@@ -3373,9 +3370,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
||||
* tree, just update pinning info and exit early.
|
||||
*/
|
||||
if ((ref->type == BTRFS_REF_METADATA &&
|
||||
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||
(ref->type == BTRFS_REF_DATA &&
|
||||
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
|
||||
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
|
||||
/* unlocks the pinned mutex */
|
||||
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
|
||||
ret = 0;
|
||||
@@ -3386,9 +3383,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
||||
}
|
||||
|
||||
if (!((ref->type == BTRFS_REF_METADATA &&
|
||||
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||
(ref->type == BTRFS_REF_DATA &&
|
||||
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
|
||||
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
|
||||
btrfs_ref_tree_mod(fs_info, ref);
|
||||
|
||||
return ret;
|
||||
@@ -3476,7 +3473,9 @@ enum btrfs_extent_allocation_policy {
|
||||
*/
|
||||
struct find_free_extent_ctl {
|
||||
/* Basic allocation info */
|
||||
u64 ram_bytes;
|
||||
u64 num_bytes;
|
||||
u64 min_alloc_size;
|
||||
u64 empty_size;
|
||||
u64 flags;
|
||||
int delalloc;
|
||||
@@ -3495,6 +3494,9 @@ struct find_free_extent_ctl {
|
||||
/* Allocation is called for tree-log */
|
||||
bool for_treelog;
|
||||
|
||||
/* Allocation is called for data relocation */
|
||||
bool for_data_reloc;
|
||||
|
||||
/* RAID index, converted from flags */
|
||||
int index;
|
||||
|
||||
@@ -3756,8 +3758,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
u64 avail;
|
||||
u64 bytenr = block_group->start;
|
||||
u64 log_bytenr;
|
||||
u64 data_reloc_bytenr;
|
||||
int ret = 0;
|
||||
bool skip;
|
||||
bool skip = false;
|
||||
|
||||
ASSERT(btrfs_is_zoned(block_group->fs_info));
|
||||
|
||||
@@ -3767,19 +3770,49 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
*/
|
||||
spin_lock(&fs_info->treelog_bg_lock);
|
||||
log_bytenr = fs_info->treelog_bg;
|
||||
skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
|
||||
(!ffe_ctl->for_treelog && bytenr == log_bytenr));
|
||||
if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
|
||||
(!ffe_ctl->for_treelog && bytenr == log_bytenr)))
|
||||
skip = true;
|
||||
spin_unlock(&fs_info->treelog_bg_lock);
|
||||
if (skip)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Do not allow non-relocation blocks in the dedicated relocation block
|
||||
* group, and vice versa.
|
||||
*/
|
||||
spin_lock(&fs_info->relocation_bg_lock);
|
||||
data_reloc_bytenr = fs_info->data_reloc_bg;
|
||||
if (data_reloc_bytenr &&
|
||||
((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
|
||||
(!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
|
||||
skip = true;
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
if (skip)
|
||||
return 1;
|
||||
/* Check RO and no space case before trying to activate it */
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->ro ||
|
||||
block_group->alloc_offset == block_group->zone_capacity) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 1;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (!btrfs_zone_activate(block_group))
|
||||
return 1;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
spin_lock(&fs_info->treelog_bg_lock);
|
||||
spin_lock(&fs_info->relocation_bg_lock);
|
||||
|
||||
ASSERT(!ffe_ctl->for_treelog ||
|
||||
block_group->start == fs_info->treelog_bg ||
|
||||
fs_info->treelog_bg == 0);
|
||||
ASSERT(!ffe_ctl->for_data_reloc ||
|
||||
block_group->start == fs_info->data_reloc_bg ||
|
||||
fs_info->data_reloc_bg == 0);
|
||||
|
||||
if (block_group->ro) {
|
||||
ret = 1;
|
||||
@@ -3796,7 +3829,18 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
goto out;
|
||||
}
|
||||
|
||||
avail = block_group->length - block_group->alloc_offset;
|
||||
/*
|
||||
* Do not allow currently used block group to be the data relocation
|
||||
* dedicated block group.
|
||||
*/
|
||||
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
|
||||
(block_group->used || block_group->reserved)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
|
||||
avail = block_group->zone_capacity - block_group->alloc_offset;
|
||||
if (avail < num_bytes) {
|
||||
if (ffe_ctl->max_extent_size < avail) {
|
||||
/*
|
||||
@@ -3813,6 +3857,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
|
||||
fs_info->treelog_bg = block_group->start;
|
||||
|
||||
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
|
||||
fs_info->data_reloc_bg = block_group->start;
|
||||
|
||||
ffe_ctl->found_offset = start + block_group->alloc_offset;
|
||||
block_group->alloc_offset += num_bytes;
|
||||
spin_lock(&ctl->tree_lock);
|
||||
@@ -3829,6 +3876,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
out:
|
||||
if (ret && ffe_ctl->for_treelog)
|
||||
fs_info->treelog_bg = 0;
|
||||
if (ret && ffe_ctl->for_data_reloc)
|
||||
fs_info->data_reloc_bg = 0;
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
spin_unlock(&fs_info->treelog_bg_lock);
|
||||
spin_unlock(&block_group->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
@@ -3932,18 +3982,30 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
|
||||
ffe_ctl->orig_have_caching_bg = true;
|
||||
|
||||
if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
|
||||
ffe_ctl->have_caching_bg)
|
||||
return 1;
|
||||
|
||||
if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
|
||||
return 1;
|
||||
|
||||
if (ins->objectid) {
|
||||
found_extent(ffe_ctl, ins);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
|
||||
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->index)) {
|
||||
/*
|
||||
* If we have enough free space left in an already active block
|
||||
* group and we can't activate any other zone now, retry the
|
||||
* active ones with a smaller allocation size. Returning early
|
||||
* from here will tell btrfs_reserve_extent() to haven the
|
||||
* size.
|
||||
*/
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
|
||||
return 1;
|
||||
|
||||
ffe_ctl->index++;
|
||||
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
|
||||
* caching kthreads as we move along
|
||||
@@ -4085,6 +4147,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
|
||||
ffe_ctl->hint_byte = fs_info->treelog_bg;
|
||||
spin_unlock(&fs_info->treelog_bg_lock);
|
||||
}
|
||||
if (ffe_ctl->for_data_reloc) {
|
||||
spin_lock(&fs_info->relocation_bg_lock);
|
||||
if (fs_info->data_reloc_bg)
|
||||
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
}
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
@@ -4117,65 +4185,62 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
|
||||
* |- If not found, re-iterate all block groups
|
||||
*/
|
||||
static noinline int find_free_extent(struct btrfs_root *root,
|
||||
u64 ram_bytes, u64 num_bytes, u64 empty_size,
|
||||
u64 hint_byte_orig, struct btrfs_key *ins,
|
||||
u64 flags, int delalloc)
|
||||
struct btrfs_key *ins,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
int ret = 0;
|
||||
int cache_block_group_error = 0;
|
||||
struct btrfs_block_group *block_group = NULL;
|
||||
struct find_free_extent_ctl ffe_ctl = {0};
|
||||
struct btrfs_space_info *space_info;
|
||||
bool full_search = false;
|
||||
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
||||
|
||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||
|
||||
ffe_ctl.num_bytes = num_bytes;
|
||||
ffe_ctl.empty_size = empty_size;
|
||||
ffe_ctl.flags = flags;
|
||||
ffe_ctl.search_start = 0;
|
||||
ffe_ctl.delalloc = delalloc;
|
||||
ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
|
||||
ffe_ctl.have_caching_bg = false;
|
||||
ffe_ctl.orig_have_caching_bg = false;
|
||||
ffe_ctl.found_offset = 0;
|
||||
ffe_ctl.hint_byte = hint_byte_orig;
|
||||
ffe_ctl.for_treelog = for_treelog;
|
||||
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
|
||||
WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
|
||||
|
||||
ffe_ctl->search_start = 0;
|
||||
/* For clustered allocation */
|
||||
ffe_ctl.retry_clustered = false;
|
||||
ffe_ctl.retry_unclustered = false;
|
||||
ffe_ctl.last_ptr = NULL;
|
||||
ffe_ctl.use_cluster = true;
|
||||
ffe_ctl->empty_cluster = 0;
|
||||
ffe_ctl->last_ptr = NULL;
|
||||
ffe_ctl->use_cluster = true;
|
||||
ffe_ctl->have_caching_bg = false;
|
||||
ffe_ctl->orig_have_caching_bg = false;
|
||||
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
|
||||
ffe_ctl->loop = 0;
|
||||
/* For clustered allocation */
|
||||
ffe_ctl->retry_clustered = false;
|
||||
ffe_ctl->retry_unclustered = false;
|
||||
ffe_ctl->cached = 0;
|
||||
ffe_ctl->max_extent_size = 0;
|
||||
ffe_ctl->total_free_space = 0;
|
||||
ffe_ctl->found_offset = 0;
|
||||
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
|
||||
|
||||
if (btrfs_is_zoned(fs_info))
|
||||
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED;
|
||||
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
|
||||
|
||||
ins->type = BTRFS_EXTENT_ITEM_KEY;
|
||||
ins->objectid = 0;
|
||||
ins->offset = 0;
|
||||
|
||||
trace_find_free_extent(root, num_bytes, empty_size, flags);
|
||||
trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
|
||||
ffe_ctl->flags);
|
||||
|
||||
space_info = btrfs_find_space_info(fs_info, flags);
|
||||
space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
|
||||
if (!space_info) {
|
||||
btrfs_err(fs_info, "No space info for %llu", flags);
|
||||
btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
|
||||
ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ffe_ctl.search_start = max(ffe_ctl.search_start,
|
||||
first_logical_byte(fs_info, 0));
|
||||
ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
|
||||
if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
|
||||
ffe_ctl->search_start = max(ffe_ctl->search_start,
|
||||
first_logical_byte(fs_info, 0));
|
||||
ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
|
||||
if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
|
||||
block_group = btrfs_lookup_block_group(fs_info,
|
||||
ffe_ctl.search_start);
|
||||
ffe_ctl->search_start);
|
||||
/*
|
||||
* we don't want to use the block group if it doesn't match our
|
||||
* allocation bits, or if its not cached.
|
||||
@@ -4183,7 +4248,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
* However if we are re-searching with an ideal block group
|
||||
* picked out then we don't care that the block group is cached.
|
||||
*/
|
||||
if (block_group && block_group_bits(block_group, flags) &&
|
||||
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
|
||||
block_group->cached != BTRFS_CACHE_NO) {
|
||||
down_read(&space_info->groups_sem);
|
||||
if (list_empty(&block_group->list) ||
|
||||
@@ -4197,9 +4262,10 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
btrfs_put_block_group(block_group);
|
||||
up_read(&space_info->groups_sem);
|
||||
} else {
|
||||
ffe_ctl.index = btrfs_bg_flags_to_raid_index(
|
||||
block_group->flags);
|
||||
btrfs_lock_block_group(block_group, delalloc);
|
||||
ffe_ctl->index = btrfs_bg_flags_to_raid_index(
|
||||
block_group->flags);
|
||||
btrfs_lock_block_group(block_group,
|
||||
ffe_ctl->delalloc);
|
||||
goto have_block_group;
|
||||
}
|
||||
} else if (block_group) {
|
||||
@@ -4207,31 +4273,33 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
}
|
||||
}
|
||||
search:
|
||||
ffe_ctl.have_caching_bg = false;
|
||||
if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
|
||||
ffe_ctl.index == 0)
|
||||
ffe_ctl->have_caching_bg = false;
|
||||
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
|
||||
ffe_ctl->index == 0)
|
||||
full_search = true;
|
||||
down_read(&space_info->groups_sem);
|
||||
list_for_each_entry(block_group,
|
||||
&space_info->block_groups[ffe_ctl.index], list) {
|
||||
&space_info->block_groups[ffe_ctl->index], list) {
|
||||
struct btrfs_block_group *bg_ret;
|
||||
|
||||
/* If the block group is read-only, we can skip it entirely. */
|
||||
if (unlikely(block_group->ro)) {
|
||||
if (for_treelog)
|
||||
if (ffe_ctl->for_treelog)
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
if (ffe_ctl->for_data_reloc)
|
||||
btrfs_clear_data_reloc_bg(block_group);
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_grab_block_group(block_group, delalloc);
|
||||
ffe_ctl.search_start = block_group->start;
|
||||
btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
|
||||
ffe_ctl->search_start = block_group->start;
|
||||
|
||||
/*
|
||||
* this can happen if we end up cycling through all the
|
||||
* raid types, but we want to make sure we only allocate
|
||||
* for the proper type.
|
||||
*/
|
||||
if (!block_group_bits(block_group, flags)) {
|
||||
if (!block_group_bits(block_group, ffe_ctl->flags)) {
|
||||
u64 extra = BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID1_MASK |
|
||||
BTRFS_BLOCK_GROUP_RAID56_MASK |
|
||||
@@ -4242,7 +4310,7 @@ search:
|
||||
* doesn't provide them, bail. This does allow us to
|
||||
* fill raid0 from raid1.
|
||||
*/
|
||||
if ((flags & extra) && !(block_group->flags & extra))
|
||||
if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
|
||||
goto loop;
|
||||
|
||||
/*
|
||||
@@ -4250,14 +4318,14 @@ search:
|
||||
* It's possible that we have MIXED_GROUP flag but no
|
||||
* block group is mixed. Just skip such block group.
|
||||
*/
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||
continue;
|
||||
}
|
||||
|
||||
have_block_group:
|
||||
ffe_ctl.cached = btrfs_block_group_done(block_group);
|
||||
if (unlikely(!ffe_ctl.cached)) {
|
||||
ffe_ctl.have_caching_bg = true;
|
||||
ffe_ctl->cached = btrfs_block_group_done(block_group);
|
||||
if (unlikely(!ffe_ctl->cached)) {
|
||||
ffe_ctl->have_caching_bg = true;
|
||||
ret = btrfs_cache_block_group(block_group, 0);
|
||||
|
||||
/*
|
||||
@@ -4280,10 +4348,11 @@ have_block_group:
|
||||
goto loop;
|
||||
|
||||
bg_ret = NULL;
|
||||
ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
|
||||
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
|
||||
if (ret == 0) {
|
||||
if (bg_ret && bg_ret != block_group) {
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
btrfs_release_block_group(block_group,
|
||||
ffe_ctl->delalloc);
|
||||
block_group = bg_ret;
|
||||
}
|
||||
} else if (ret == -EAGAIN) {
|
||||
@@ -4293,46 +4362,49 @@ have_block_group:
|
||||
}
|
||||
|
||||
/* Checks */
|
||||
ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
|
||||
fs_info->stripesize);
|
||||
ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
|
||||
fs_info->stripesize);
|
||||
|
||||
/* move on to the next group */
|
||||
if (ffe_ctl.search_start + num_bytes >
|
||||
if (ffe_ctl->search_start + ffe_ctl->num_bytes >
|
||||
block_group->start + block_group->length) {
|
||||
btrfs_add_free_space_unused(block_group,
|
||||
ffe_ctl.found_offset, num_bytes);
|
||||
ffe_ctl->found_offset,
|
||||
ffe_ctl->num_bytes);
|
||||
goto loop;
|
||||
}
|
||||
|
||||
if (ffe_ctl.found_offset < ffe_ctl.search_start)
|
||||
if (ffe_ctl->found_offset < ffe_ctl->search_start)
|
||||
btrfs_add_free_space_unused(block_group,
|
||||
ffe_ctl.found_offset,
|
||||
ffe_ctl.search_start - ffe_ctl.found_offset);
|
||||
ffe_ctl->found_offset,
|
||||
ffe_ctl->search_start - ffe_ctl->found_offset);
|
||||
|
||||
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
|
||||
num_bytes, delalloc);
|
||||
ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
|
||||
ffe_ctl->num_bytes,
|
||||
ffe_ctl->delalloc);
|
||||
if (ret == -EAGAIN) {
|
||||
btrfs_add_free_space_unused(block_group,
|
||||
ffe_ctl.found_offset, num_bytes);
|
||||
ffe_ctl->found_offset,
|
||||
ffe_ctl->num_bytes);
|
||||
goto loop;
|
||||
}
|
||||
btrfs_inc_block_group_reservations(block_group);
|
||||
|
||||
/* we are all good, lets return */
|
||||
ins->objectid = ffe_ctl.search_start;
|
||||
ins->offset = num_bytes;
|
||||
ins->objectid = ffe_ctl->search_start;
|
||||
ins->offset = ffe_ctl->num_bytes;
|
||||
|
||||
trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
|
||||
num_bytes);
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
|
||||
ffe_ctl->num_bytes);
|
||||
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||
break;
|
||||
loop:
|
||||
release_block_group(block_group, &ffe_ctl, delalloc);
|
||||
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
|
||||
cond_resched();
|
||||
}
|
||||
up_read(&space_info->groups_sem);
|
||||
|
||||
ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
|
||||
ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
|
||||
if (ret > 0)
|
||||
goto search;
|
||||
|
||||
@@ -4341,12 +4413,12 @@ loop:
|
||||
* Use ffe_ctl->total_free_space as fallback if we can't find
|
||||
* any contiguous hole.
|
||||
*/
|
||||
if (!ffe_ctl.max_extent_size)
|
||||
ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
|
||||
if (!ffe_ctl->max_extent_size)
|
||||
ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
|
||||
spin_lock(&space_info->lock);
|
||||
space_info->max_extent_size = ffe_ctl.max_extent_size;
|
||||
space_info->max_extent_size = ffe_ctl->max_extent_size;
|
||||
spin_unlock(&space_info->lock);
|
||||
ins->offset = ffe_ctl.max_extent_size;
|
||||
ins->offset = ffe_ctl->max_extent_size;
|
||||
} else if (ret == -ENOSPC) {
|
||||
ret = cache_block_group_error;
|
||||
}
|
||||
@@ -4404,16 +4476,28 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
|
||||
struct btrfs_key *ins, int is_data, int delalloc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct find_free_extent_ctl ffe_ctl = {};
|
||||
bool final_tried = num_bytes == min_alloc_size;
|
||||
u64 flags;
|
||||
int ret;
|
||||
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
||||
bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
|
||||
|
||||
flags = get_alloc_profile_by_root(root, is_data);
|
||||
again:
|
||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||
ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
|
||||
hint_byte, ins, flags, delalloc);
|
||||
|
||||
ffe_ctl.ram_bytes = ram_bytes;
|
||||
ffe_ctl.num_bytes = num_bytes;
|
||||
ffe_ctl.min_alloc_size = min_alloc_size;
|
||||
ffe_ctl.empty_size = empty_size;
|
||||
ffe_ctl.flags = flags;
|
||||
ffe_ctl.delalloc = delalloc;
|
||||
ffe_ctl.hint_byte = hint_byte;
|
||||
ffe_ctl.for_treelog = for_treelog;
|
||||
ffe_ctl.for_data_reloc = for_data_reloc;
|
||||
|
||||
ret = find_free_extent(root, ins, &ffe_ctl);
|
||||
if (!ret && !is_data) {
|
||||
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
|
||||
} else if (ret == -ENOSPC) {
|
||||
@@ -4431,8 +4515,8 @@ again:
|
||||
|
||||
sinfo = btrfs_find_space_info(fs_info, flags);
|
||||
btrfs_err(fs_info,
|
||||
"allocation failed flags %llu, wanted %llu tree-log %d",
|
||||
flags, num_bytes, for_treelog);
|
||||
"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
|
||||
flags, num_bytes, for_treelog, for_data_reloc);
|
||||
if (sinfo)
|
||||
btrfs_dump_space_info(fs_info, sinfo,
|
||||
num_bytes, 1);
|
||||
@@ -4543,7 +4627,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
|
||||
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
|
||||
if (ret) { /* -ENOENT, logic error */
|
||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||
ins->objectid, ins->offset);
|
||||
@@ -4632,7 +4716,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
|
||||
ret = btrfs_update_block_group(trans, extent_key.objectid,
|
||||
fs_info->nodesize, 1);
|
||||
fs_info->nodesize, true);
|
||||
if (ret) { /* -ENOENT, logic error */
|
||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||
extent_key.objectid, extent_key.offset);
|
||||
@@ -4655,7 +4739,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
||||
ins->objectid, ins->offset, 0);
|
||||
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
|
||||
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
|
||||
offset, 0, false);
|
||||
btrfs_ref_tree_mod(root->fs_info, &generic_ref);
|
||||
|
||||
return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
|
||||
@@ -4847,8 +4932,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
||||
ins.objectid, ins.offset, parent);
|
||||
generic_ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_tree_ref(&generic_ref, level, root_objectid);
|
||||
btrfs_init_tree_ref(&generic_ref, level, root_objectid,
|
||||
root->root_key.objectid, false);
|
||||
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
||||
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
|
||||
if (ret)
|
||||
@@ -5265,7 +5350,8 @@ skip:
|
||||
|
||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||
fs_info->nodesize, parent);
|
||||
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
|
||||
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
|
||||
0, false);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
@@ -5750,13 +5836,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
btrfs_assert_tree_locked(parent);
|
||||
btrfs_assert_tree_write_locked(parent);
|
||||
parent_level = btrfs_header_level(parent);
|
||||
atomic_inc(&parent->refs);
|
||||
path->nodes[parent_level] = parent;
|
||||
path->slots[parent_level] = btrfs_header_nritems(parent);
|
||||
|
||||
btrfs_assert_tree_locked(node);
|
||||
btrfs_assert_tree_write_locked(node);
|
||||
level = btrfs_header_level(node);
|
||||
path->nodes[level] = node;
|
||||
path->slots[level] = 0;
|
||||
|
||||
@@ -241,7 +241,7 @@ int __init extent_io_init(void)
|
||||
return -ENOMEM;
|
||||
|
||||
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
|
||||
offsetof(struct btrfs_io_bio, bio),
|
||||
offsetof(struct btrfs_bio, bio),
|
||||
BIOSET_NEED_BVECS))
|
||||
goto free_buffer_cache;
|
||||
|
||||
@@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,
|
||||
|
||||
/*
|
||||
* Find and lock a contiguous range of bytes in the file marked as delalloc, no
|
||||
* more than @max_bytes. @Start and @end are used to return the range,
|
||||
* more than @max_bytes.
|
||||
*
|
||||
* Return: true if we find something
|
||||
* false if nothing was in the tree
|
||||
* @start: The original start bytenr to search.
|
||||
* Will store the extent range start bytenr.
|
||||
* @end: The original end bytenr of the search range
|
||||
* Will store the extent range end bytenr.
|
||||
*
|
||||
* Return true if we find a delalloc range which starts inside the original
|
||||
* range, and @start/@end will store the delalloc range start/end.
|
||||
*
|
||||
* Return false if we can't find any delalloc range which starts inside the
|
||||
* original range, and @start/@end will be the non-delalloc range start/end.
|
||||
*/
|
||||
EXPORT_FOR_TESTS
|
||||
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
@@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
u64 *end)
|
||||
{
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
const u64 orig_start = *start;
|
||||
const u64 orig_end = *end;
|
||||
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
@@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
int ret;
|
||||
int loops = 0;
|
||||
|
||||
/* Caller should pass a valid @end to indicate the search range end */
|
||||
ASSERT(orig_end > orig_start);
|
||||
|
||||
/* The range should at least cover part of the page */
|
||||
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
|
||||
orig_end <= page_offset(locked_page)));
|
||||
again:
|
||||
/* step one, find a bunch of delalloc bytes starting at start */
|
||||
delalloc_start = *start;
|
||||
delalloc_end = 0;
|
||||
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
|
||||
max_bytes, &cached_state);
|
||||
if (!found || delalloc_end <= *start) {
|
||||
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
|
||||
*start = delalloc_start;
|
||||
*end = delalloc_end;
|
||||
|
||||
/* @delalloc_end can be -1, never go beyond @orig_end */
|
||||
*end = min(delalloc_end, orig_end);
|
||||
free_extent_state(cached_state);
|
||||
return false;
|
||||
}
|
||||
@@ -2282,15 +2300,15 @@ int free_io_failure(struct extent_io_tree *failure_tree,
|
||||
* currently, there can be no more than two copies of every data bit. thus,
|
||||
* exactly one rewrite is required.
|
||||
*/
|
||||
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num)
|
||||
static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_device *dev;
|
||||
u64 map_length = 0;
|
||||
u64 sector;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
int ret;
|
||||
|
||||
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
|
||||
@@ -2299,12 +2317,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
if (btrfs_is_zoned(fs_info))
|
||||
return btrfs_repair_one_zone(fs_info, logical);
|
||||
|
||||
bio = btrfs_io_bio_alloc(1);
|
||||
bio = btrfs_bio_alloc(1);
|
||||
bio->bi_iter.bi_size = 0;
|
||||
map_length = length;
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bbio has devices
|
||||
* Avoid races with device replace and make sure our bioc has devices
|
||||
* associated to its stripes that don't go away while we are doing the
|
||||
* read repair operation.
|
||||
*/
|
||||
@@ -2317,28 +2335,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
* stripe's dev and sector.
|
||||
*/
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
|
||||
&map_length, &bbio, 0);
|
||||
&map_length, &bioc, 0);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
ASSERT(bbio->mirror_num == 1);
|
||||
ASSERT(bioc->mirror_num == 1);
|
||||
} else {
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
||||
&map_length, &bbio, mirror_num);
|
||||
&map_length, &bioc, mirror_num);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
BUG_ON(mirror_num != bbio->mirror_num);
|
||||
BUG_ON(mirror_num != bioc->mirror_num);
|
||||
}
|
||||
|
||||
sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
|
||||
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
dev = bbio->stripes[bbio->mirror_num - 1].dev;
|
||||
btrfs_put_bbio(bbio);
|
||||
dev = bioc->stripes[bioc->mirror_num - 1].dev;
|
||||
btrfs_put_bioc(bioc);
|
||||
if (!dev || !dev->bdev ||
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
@@ -2618,10 +2636,10 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
||||
struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
|
||||
struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
|
||||
const int icsum = bio_offset >> fs_info->sectorsize_bits;
|
||||
struct bio *repair_bio;
|
||||
struct btrfs_io_bio *repair_io_bio;
|
||||
struct btrfs_bio *repair_bbio;
|
||||
blk_status_t status;
|
||||
|
||||
btrfs_debug(fs_info,
|
||||
@@ -2639,24 +2657,23 @@ int btrfs_repair_one_sector(struct inode *inode,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
repair_bio = btrfs_io_bio_alloc(1);
|
||||
repair_io_bio = btrfs_io_bio(repair_bio);
|
||||
repair_bio = btrfs_bio_alloc(1);
|
||||
repair_bbio = btrfs_bio(repair_bio);
|
||||
repair_bio->bi_opf = REQ_OP_READ;
|
||||
repair_bio->bi_end_io = failed_bio->bi_end_io;
|
||||
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
|
||||
repair_bio->bi_private = failed_bio->bi_private;
|
||||
|
||||
if (failed_io_bio->csum) {
|
||||
if (failed_bbio->csum) {
|
||||
const u32 csum_size = fs_info->csum_size;
|
||||
|
||||
repair_io_bio->csum = repair_io_bio->csum_inline;
|
||||
memcpy(repair_io_bio->csum,
|
||||
failed_io_bio->csum + csum_size * icsum, csum_size);
|
||||
repair_bbio->csum = repair_bbio->csum_inline;
|
||||
memcpy(repair_bbio->csum,
|
||||
failed_bbio->csum + csum_size * icsum, csum_size);
|
||||
}
|
||||
|
||||
bio_add_page(repair_bio, page, failrec->len, pgoff);
|
||||
repair_io_bio->logical = failrec->start;
|
||||
repair_io_bio->iter = repair_bio->bi_iter;
|
||||
repair_bbio->iter = repair_bio->bi_iter;
|
||||
|
||||
btrfs_debug(btrfs_sb(inode->i_sb),
|
||||
"repair read error: submitting new read to mirror %d",
|
||||
@@ -2976,7 +2993,7 @@ static struct extent_buffer *find_extent_buffer_readpage(
|
||||
static void end_bio_extent_readpage(struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
struct extent_io_tree *tree, *failure_tree;
|
||||
struct processed_extent processed = { 0 };
|
||||
/*
|
||||
@@ -3003,7 +3020,7 @@ static void end_bio_extent_readpage(struct bio *bio)
|
||||
btrfs_debug(fs_info,
|
||||
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
|
||||
bio->bi_iter.bi_sector, bio->bi_status,
|
||||
io_bio->mirror_num);
|
||||
bbio->mirror_num);
|
||||
tree = &BTRFS_I(inode)->io_tree;
|
||||
failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
||||
|
||||
@@ -3028,14 +3045,14 @@ static void end_bio_extent_readpage(struct bio *bio)
|
||||
end = start + bvec->bv_len - 1;
|
||||
len = bvec->bv_len;
|
||||
|
||||
mirror = io_bio->mirror_num;
|
||||
mirror = bbio->mirror_num;
|
||||
if (likely(uptodate)) {
|
||||
if (is_data_inode(inode)) {
|
||||
error_bitmap = btrfs_verify_data_csum(io_bio,
|
||||
error_bitmap = btrfs_verify_data_csum(bbio,
|
||||
bio_offset, page, start, end);
|
||||
ret = error_bitmap;
|
||||
} else {
|
||||
ret = btrfs_validate_metadata_buffer(io_bio,
|
||||
ret = btrfs_validate_metadata_buffer(bbio,
|
||||
page, start, end, mirror);
|
||||
}
|
||||
if (ret)
|
||||
@@ -3106,7 +3123,7 @@ readpage_ok:
|
||||
}
|
||||
/* Release the last extent */
|
||||
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
|
||||
btrfs_io_bio_free_csum(io_bio);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
@@ -3115,53 +3132,43 @@ readpage_ok:
|
||||
* new bio by bio_alloc_bioset as it does not initialize the bytes outside of
|
||||
* 'bio' because use of __GFP_ZERO is not supported.
|
||||
*/
|
||||
static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
|
||||
static inline void btrfs_bio_init(struct btrfs_bio *bbio)
|
||||
{
|
||||
memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
|
||||
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
|
||||
}
|
||||
|
||||
/*
|
||||
* The following helpers allocate a bio. As it's backed by a bioset, it'll
|
||||
* never fail. We're returning a bio right now but you can call btrfs_io_bio
|
||||
* for the appropriate container_of magic
|
||||
* Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs.
|
||||
*
|
||||
* The bio allocation is backed by bioset and does not fail.
|
||||
*/
|
||||
struct bio *btrfs_bio_alloc(u64 first_byte)
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
|
||||
bio->bi_iter.bi_sector = first_byte >> 9;
|
||||
btrfs_io_bio_init(btrfs_io_bio(bio));
|
||||
ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
|
||||
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
|
||||
btrfs_bio_init(btrfs_bio(bio));
|
||||
return bio;
|
||||
}
|
||||
|
||||
struct bio *btrfs_bio_clone(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_bio *btrfs_bio;
|
||||
struct btrfs_bio *bbio;
|
||||
struct bio *new;
|
||||
|
||||
/* Bio allocation backed by a bioset does not fail */
|
||||
new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
|
||||
btrfs_bio = btrfs_io_bio(new);
|
||||
btrfs_io_bio_init(btrfs_bio);
|
||||
btrfs_bio->iter = bio->bi_iter;
|
||||
bbio = btrfs_bio(new);
|
||||
btrfs_bio_init(bbio);
|
||||
bbio->iter = bio->bi_iter;
|
||||
return new;
|
||||
}
|
||||
|
||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
/* Bio allocation backed by a bioset does not fail */
|
||||
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
|
||||
btrfs_io_bio_init(btrfs_io_bio(bio));
|
||||
return bio;
|
||||
}
|
||||
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_io_bio *btrfs_bio;
|
||||
struct btrfs_bio *bbio;
|
||||
|
||||
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
|
||||
|
||||
@@ -3169,11 +3176,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
|
||||
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
|
||||
ASSERT(bio);
|
||||
|
||||
btrfs_bio = btrfs_io_bio(bio);
|
||||
btrfs_io_bio_init(btrfs_bio);
|
||||
bbio = btrfs_bio(bio);
|
||||
btrfs_bio_init(bbio);
|
||||
|
||||
bio_trim(bio, offset >> 9, size >> 9);
|
||||
btrfs_bio->iter = bio->bi_iter;
|
||||
bbio->iter = bio->bi_iter;
|
||||
return bio;
|
||||
}
|
||||
|
||||
@@ -3307,14 +3314,15 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
/*
|
||||
* For compressed page range, its disk_bytenr is always @disk_bytenr
|
||||
* passed in, no matter if we have added any range into previous bio.
|
||||
*/
|
||||
if (bio_flags & EXTENT_BIO_COMPRESSED)
|
||||
bio = btrfs_bio_alloc(disk_bytenr);
|
||||
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
else
|
||||
bio = btrfs_bio_alloc(disk_bytenr + offset);
|
||||
bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
|
||||
bio_ctrl->bio = bio;
|
||||
bio_ctrl->bio_flags = bio_flags;
|
||||
bio->bi_end_io = end_io_func;
|
||||
@@ -3327,7 +3335,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
||||
if (wbc) {
|
||||
struct block_device *bdev;
|
||||
|
||||
bdev = fs_info->fs_devices->latest_bdev;
|
||||
bdev = fs_info->fs_devices->latest_dev->bdev;
|
||||
bio_set_dev(bio, bdev);
|
||||
wbc_init_bio(wbc, bio);
|
||||
}
|
||||
@@ -3341,7 +3349,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
||||
goto error;
|
||||
}
|
||||
|
||||
btrfs_io_bio(bio)->device = device;
|
||||
btrfs_bio(bio)->device = device;
|
||||
}
|
||||
return 0;
|
||||
error:
|
||||
@@ -3599,6 +3607,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
bool force_bio_submit = false;
|
||||
u64 disk_bytenr;
|
||||
|
||||
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
|
||||
if (cur >= last_byte) {
|
||||
struct extent_state *cached = NULL;
|
||||
|
||||
@@ -3777,17 +3786,18 @@ static void update_nr_written(struct writeback_control *wbc,
|
||||
*/
|
||||
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
struct page *page, struct writeback_control *wbc,
|
||||
u64 delalloc_start, unsigned long *nr_written)
|
||||
unsigned long *nr_written)
|
||||
{
|
||||
u64 page_end = delalloc_start + PAGE_SIZE - 1;
|
||||
bool found;
|
||||
const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
|
||||
u64 delalloc_start = page_offset(page);
|
||||
u64 delalloc_to_write = 0;
|
||||
u64 delalloc_end = 0;
|
||||
int ret;
|
||||
int page_started = 0;
|
||||
|
||||
while (delalloc_start < page_end) {
|
||||
u64 delalloc_end = page_end;
|
||||
bool found;
|
||||
|
||||
while (delalloc_end < page_end) {
|
||||
found = find_lock_delalloc_range(&inode->vfs_inode, page,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
@@ -3854,12 +3864,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 *start, u64 *end)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
struct btrfs_subpage_info *spi = fs_info->subpage_info;
|
||||
u64 orig_start = *start;
|
||||
/* Declare as unsigned long so we can use bitmap ops */
|
||||
unsigned long dirty_bitmap;
|
||||
unsigned long flags;
|
||||
int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
|
||||
int range_start_bit = nbits;
|
||||
int range_start_bit;
|
||||
int range_end_bit;
|
||||
|
||||
/*
|
||||
@@ -3872,13 +3881,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
|
||||
return;
|
||||
}
|
||||
|
||||
range_start_bit = spi->dirty_offset +
|
||||
(offset_in_page(orig_start) >> fs_info->sectorsize_bits);
|
||||
|
||||
/* We should have the page locked, but just in case */
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
dirty_bitmap = subpage->dirty_bitmap;
|
||||
bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
|
||||
spi->dirty_offset + spi->bitmap_nr_bits);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
|
||||
bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
|
||||
BTRFS_SUBPAGE_BITMAP_SIZE);
|
||||
range_start_bit -= spi->dirty_offset;
|
||||
range_end_bit -= spi->dirty_offset;
|
||||
|
||||
*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
|
||||
*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
|
||||
}
|
||||
@@ -4054,8 +4068,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
||||
struct extent_page_data *epd)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
u64 start = page_offset(page);
|
||||
u64 page_end = start + PAGE_SIZE - 1;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
const u64 page_start = page_offset(page);
|
||||
const u64 page_end = page_start + PAGE_SIZE - 1;
|
||||
int ret;
|
||||
int nr = 0;
|
||||
size_t pg_offset;
|
||||
@@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
||||
}
|
||||
|
||||
if (!epd->extent_locked) {
|
||||
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
|
||||
&nr_written);
|
||||
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
if (ret)
|
||||
@@ -4141,8 +4155,20 @@ done:
|
||||
* capable of that.
|
||||
*/
|
||||
if (PageError(page))
|
||||
end_extent_writepage(page, ret, start, page_end);
|
||||
unlock_page(page);
|
||||
end_extent_writepage(page, ret, page_start, page_end);
|
||||
if (epd->extent_locked) {
|
||||
/*
|
||||
* If epd->extent_locked, it's from extent_write_locked_range(),
|
||||
* the page can either be locked by lock_page() or
|
||||
* process_one_page().
|
||||
* Let btrfs_page_unlock_writer() handle both cases.
|
||||
*/
|
||||
ASSERT(wbc);
|
||||
btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
|
||||
wbc->range_end + 1 - wbc->range_start);
|
||||
} else {
|
||||
unlock_page(page);
|
||||
}
|
||||
ASSERT(ret <= 0);
|
||||
return ret;
|
||||
}
|
||||
@@ -4155,6 +4181,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
||||
|
||||
static void end_extent_buffer_writeback(struct extent_buffer *eb)
|
||||
{
|
||||
if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
|
||||
btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
|
||||
|
||||
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
|
||||
smp_mb__after_atomic();
|
||||
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
|
||||
@@ -4602,12 +4631,11 @@ static int submit_eb_subpage(struct page *page,
|
||||
int submitted = 0;
|
||||
u64 page_start = page_offset(page);
|
||||
int bit_start = 0;
|
||||
const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
|
||||
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
|
||||
int ret;
|
||||
|
||||
/* Lock and write each dirty extent buffers in the range */
|
||||
while (bit_start < nbits) {
|
||||
while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
struct extent_buffer *eb;
|
||||
unsigned long flags;
|
||||
@@ -4623,7 +4651,8 @@ static int submit_eb_subpage(struct page *page,
|
||||
break;
|
||||
}
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
if (!((1 << bit_start) & subpage->dirty_bitmap)) {
|
||||
if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
|
||||
subpage->bitmaps)) {
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
spin_unlock(&page->mapping->private_lock);
|
||||
bit_start++;
|
||||
@@ -4756,8 +4785,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
|
||||
free_extent_buffer(eb);
|
||||
return ret;
|
||||
}
|
||||
if (cache)
|
||||
if (cache) {
|
||||
/* Impiles write in zoned mode */
|
||||
btrfs_put_block_group(cache);
|
||||
/* Mark the last eb in a block group */
|
||||
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
|
||||
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
|
||||
}
|
||||
ret = write_one_eb(eb, wbc, epd);
|
||||
free_extent_buffer(eb);
|
||||
if (ret < 0)
|
||||
@@ -4873,7 +4907,7 @@ retry:
|
||||
* extent io tree. Thus we don't want to submit such wild eb
|
||||
* if the fs already has error.
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
if (!BTRFS_FS_ERROR(fs_info)) {
|
||||
ret = flush_write_bio(&epd);
|
||||
} else {
|
||||
ret = -EROFS;
|
||||
@@ -5069,23 +5103,28 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
int mode)
|
||||
/*
|
||||
* Submit the pages in the range to bio for call sites which delalloc range has
|
||||
* already been ran (aka, ordered extent inserted) and all pages are still
|
||||
* locked.
|
||||
*/
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
|
||||
{
|
||||
bool found_error = false;
|
||||
int first_error = 0;
|
||||
int ret = 0;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct page *page;
|
||||
unsigned long nr_pages = (end - start + PAGE_SIZE) >>
|
||||
PAGE_SHIFT;
|
||||
|
||||
u64 cur = start;
|
||||
unsigned long nr_pages;
|
||||
const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
|
||||
struct extent_page_data epd = {
|
||||
.bio_ctrl = { 0 },
|
||||
.extent_locked = 1,
|
||||
.sync_io = mode == WB_SYNC_ALL,
|
||||
.sync_io = 1,
|
||||
};
|
||||
struct writeback_control wbc_writepages = {
|
||||
.sync_mode = mode,
|
||||
.nr_to_write = nr_pages * 2,
|
||||
.sync_mode = WB_SYNC_ALL,
|
||||
.range_start = start,
|
||||
.range_end = end + 1,
|
||||
/* We're called from an async helper function */
|
||||
@@ -5093,33 +5132,51 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
.no_cgroup_owner = 1,
|
||||
};
|
||||
|
||||
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
|
||||
nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
|
||||
PAGE_SHIFT;
|
||||
wbc_writepages.nr_to_write = nr_pages * 2;
|
||||
|
||||
wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
|
||||
while (start <= end) {
|
||||
page = find_get_page(mapping, start >> PAGE_SHIFT);
|
||||
if (clear_page_dirty_for_io(page))
|
||||
ret = __extent_writepage(page, &wbc_writepages, &epd);
|
||||
else {
|
||||
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
|
||||
page, start, start + PAGE_SIZE - 1, true);
|
||||
unlock_page(page);
|
||||
while (cur <= end) {
|
||||
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
|
||||
|
||||
page = find_get_page(mapping, cur >> PAGE_SHIFT);
|
||||
/*
|
||||
* All pages in the range are locked since
|
||||
* btrfs_run_delalloc_range(), thus there is no way to clear
|
||||
* the page dirty flag.
|
||||
*/
|
||||
ASSERT(PageLocked(page));
|
||||
ASSERT(PageDirty(page));
|
||||
clear_page_dirty_for_io(page);
|
||||
ret = __extent_writepage(page, &wbc_writepages, &epd);
|
||||
ASSERT(ret <= 0);
|
||||
if (ret < 0) {
|
||||
found_error = true;
|
||||
first_error = ret;
|
||||
}
|
||||
put_page(page);
|
||||
start += PAGE_SIZE;
|
||||
cur = cur_end + 1;
|
||||
}
|
||||
|
||||
ASSERT(ret <= 0);
|
||||
if (ret == 0)
|
||||
if (!found_error)
|
||||
ret = flush_write_bio(&epd);
|
||||
else
|
||||
end_write_bio(&epd, ret);
|
||||
|
||||
wbc_detach_inode(&wbc_writepages);
|
||||
if (found_error)
|
||||
return first_error;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int extent_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root);
|
||||
const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info);
|
||||
int ret = 0;
|
||||
struct extent_page_data epd = {
|
||||
.bio_ctrl = { 0 },
|
||||
@@ -5127,7 +5184,15 @@ int extent_writepages(struct address_space *mapping,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
};
|
||||
|
||||
/*
|
||||
* Allow only a single thread to do the reloc work in zoned mode to
|
||||
* protect the write pointer updates.
|
||||
*/
|
||||
if (data_reloc && zoned)
|
||||
btrfs_inode_lock(inode, 0);
|
||||
ret = extent_write_cache_pages(mapping, wbc, &epd);
|
||||
if (data_reloc && zoned)
|
||||
btrfs_inode_unlock(inode, 0);
|
||||
ASSERT(ret <= 0);
|
||||
if (ret < 0) {
|
||||
end_write_bio(&epd, ret);
|
||||
@@ -6137,13 +6202,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
* page, but it may change in the future for 16K page size
|
||||
* support, so we still preallocate the memory in the loop.
|
||||
*/
|
||||
ret = btrfs_alloc_subpage(fs_info, &prealloc,
|
||||
BTRFS_SUBPAGE_METADATA);
|
||||
if (ret < 0) {
|
||||
unlock_page(p);
|
||||
put_page(p);
|
||||
exists = ERR_PTR(ret);
|
||||
goto free_eb;
|
||||
if (fs_info->sectorsize < PAGE_SIZE) {
|
||||
prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
|
||||
if (IS_ERR(prealloc)) {
|
||||
ret = PTR_ERR(prealloc);
|
||||
unlock_page(p);
|
||||
put_page(p);
|
||||
exists = ERR_PTR(ret);
|
||||
goto free_eb;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&mapping->private_lock);
|
||||
@@ -7167,32 +7234,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
|
||||
}
|
||||
}
|
||||
|
||||
#define GANG_LOOKUP_SIZE 16
|
||||
static struct extent_buffer *get_next_extent_buffer(
|
||||
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
|
||||
{
|
||||
struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
|
||||
struct extent_buffer *gang[GANG_LOOKUP_SIZE];
|
||||
struct extent_buffer *found = NULL;
|
||||
u64 page_start = page_offset(page);
|
||||
int ret;
|
||||
int i;
|
||||
u64 cur = page_start;
|
||||
|
||||
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
|
||||
ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
|
||||
lockdep_assert_held(&fs_info->buffer_lock);
|
||||
|
||||
ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
|
||||
bytenr >> fs_info->sectorsize_bits,
|
||||
PAGE_SIZE / fs_info->nodesize);
|
||||
for (i = 0; i < ret; i++) {
|
||||
/* Already beyond page end */
|
||||
if (gang[i]->start >= page_start + PAGE_SIZE)
|
||||
break;
|
||||
/* Found one */
|
||||
if (gang[i]->start >= bytenr) {
|
||||
found = gang[i];
|
||||
break;
|
||||
while (cur < page_start + PAGE_SIZE) {
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
|
||||
(void **)gang, cur >> fs_info->sectorsize_bits,
|
||||
min_t(unsigned int, GANG_LOOKUP_SIZE,
|
||||
PAGE_SIZE / fs_info->nodesize));
|
||||
if (ret == 0)
|
||||
goto out;
|
||||
for (i = 0; i < ret; i++) {
|
||||
/* Already beyond page end */
|
||||
if (gang[i]->start >= page_start + PAGE_SIZE)
|
||||
goto out;
|
||||
/* Found one */
|
||||
if (gang[i]->start >= bytenr) {
|
||||
found = gang[i];
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
cur = gang[ret - 1]->start + gang[ret - 1]->len;
|
||||
}
|
||||
out:
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ enum {
|
||||
/* write IO error */
|
||||
EXTENT_BUFFER_WRITE_ERR,
|
||||
EXTENT_BUFFER_NO_CHECK,
|
||||
EXTENT_BUFFER_ZONE_FINISH,
|
||||
};
|
||||
|
||||
/* these are flags for __process_pages_contig */
|
||||
@@ -183,8 +184,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
||||
struct btrfs_bio_ctrl *bio_ctrl,
|
||||
unsigned int read_flags, u64 *prev_em_start);
|
||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
int mode);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
|
||||
int extent_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
int btree_write_cache_pages(struct address_space *mapping,
|
||||
@@ -277,14 +277,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct page *locked_page,
|
||||
u32 bits_to_clear, unsigned long page_ops);
|
||||
struct bio *btrfs_bio_alloc(u64 first_byte);
|
||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_clone(struct bio *bio);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
||||
|
||||
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num);
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
||||
|
||||
|
||||
@@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_bio_stripe *stripe = &map->stripes[i];
|
||||
struct btrfs_io_stripe *stripe = &map->stripes[i];
|
||||
struct btrfs_device *device = stripe->dev;
|
||||
|
||||
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
|
||||
@@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_bio_stripe *stripe = &map->stripes[i];
|
||||
struct btrfs_io_stripe *stripe = &map->stripes[i];
|
||||
struct btrfs_device *device = stripe->dev;
|
||||
|
||||
__clear_extent_bit(&device->alloc_state, stripe->physical,
|
||||
|
||||
@@ -358,7 +358,7 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
|
||||
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
|
||||
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
|
||||
* NULL, the checksum buffer is allocated and returned in
|
||||
* btrfs_io_bio(bio)->csum instead.
|
||||
* btrfs_bio(bio)->csum instead.
|
||||
*
|
||||
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
||||
*/
|
||||
@@ -397,19 +397,18 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
if (!dst) {
|
||||
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
|
||||
btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
|
||||
GFP_NOFS);
|
||||
if (!btrfs_bio->csum) {
|
||||
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
|
||||
if (!bbio->csum) {
|
||||
btrfs_free_path(path);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
} else {
|
||||
btrfs_bio->csum = btrfs_bio->csum_inline;
|
||||
bbio->csum = bbio->csum_inline;
|
||||
}
|
||||
csum = btrfs_bio->csum;
|
||||
csum = bbio->csum;
|
||||
} else {
|
||||
csum = dst;
|
||||
}
|
||||
@@ -709,12 +708,12 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
index = 0;
|
||||
}
|
||||
|
||||
data = kmap_atomic(bvec.bv_page);
|
||||
crypto_shash_digest(shash, data + bvec.bv_offset
|
||||
+ (i * fs_info->sectorsize),
|
||||
data = bvec_kmap_local(&bvec);
|
||||
crypto_shash_digest(shash,
|
||||
data + (i * fs_info->sectorsize),
|
||||
fs_info->sectorsize,
|
||||
sums->sums + index);
|
||||
kunmap_atomic(data);
|
||||
kunmap_local(data);
|
||||
index += fs_info->csum_size;
|
||||
offset += fs_info->sectorsize;
|
||||
this_sum_bytes += fs_info->sectorsize;
|
||||
|
||||
@@ -437,9 +437,15 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||
/*
|
||||
* unlocks pages after btrfs_file_write is done with them
|
||||
*/
|
||||
static void btrfs_drop_pages(struct page **pages, size_t num_pages)
|
||||
static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
|
||||
struct page **pages, size_t num_pages,
|
||||
u64 pos, u64 copied)
|
||||
{
|
||||
size_t i;
|
||||
u64 block_start = round_down(pos, fs_info->sectorsize);
|
||||
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
|
||||
|
||||
ASSERT(block_len <= U32_MAX);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
/* page checked is some magic around finding pages that
|
||||
* have been modified without going through btrfs_set_page_dirty
|
||||
@@ -447,7 +453,8 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
|
||||
* accessed as prepare_pages should have marked them accessed
|
||||
* in prepare_pages via find_or_create_page()
|
||||
*/
|
||||
ClearPageChecked(pages[i]);
|
||||
btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start,
|
||||
block_len);
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
@@ -504,7 +511,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
struct page *p = pages[i];
|
||||
|
||||
btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
|
||||
ClearPageChecked(p);
|
||||
btrfs_page_clamp_clear_checked(fs_info, p, start_pos, num_bytes);
|
||||
btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
|
||||
}
|
||||
|
||||
@@ -869,7 +876,8 @@ next_slot:
|
||||
btrfs_init_data_ref(&ref,
|
||||
root->root_key.objectid,
|
||||
new_key.objectid,
|
||||
args->start - extent_offset);
|
||||
args->start - extent_offset,
|
||||
0, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
@@ -955,7 +963,8 @@ delete_extent_item:
|
||||
btrfs_init_data_ref(&ref,
|
||||
root->root_key.objectid,
|
||||
key.objectid,
|
||||
key.offset - extent_offset);
|
||||
key.offset - extent_offset, 0,
|
||||
false);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
args->bytes_found += extent_end - key.offset;
|
||||
@@ -1020,8 +1029,7 @@ delete_extent_item:
|
||||
if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
|
||||
path->slots[0]++;
|
||||
}
|
||||
setup_items_for_insert(root, path, &key,
|
||||
&args->extent_item_size, 1);
|
||||
btrfs_setup_item_for_insert(root, path, &key, args->extent_item_size);
|
||||
args->extent_inserted = true;
|
||||
}
|
||||
|
||||
@@ -1232,7 +1240,7 @@ again:
|
||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
|
||||
num_bytes, 0);
|
||||
btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
|
||||
orig_offset);
|
||||
orig_offset, 0, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1257,7 +1265,8 @@ again:
|
||||
other_end = 0;
|
||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||
num_bytes, 0);
|
||||
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
|
||||
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset,
|
||||
0, false);
|
||||
if (extent_mergeable(leaf, path->slots[0] + 1,
|
||||
ino, bytenr, orig_offset,
|
||||
&other_start, &other_end)) {
|
||||
@@ -1844,7 +1853,7 @@ again:
|
||||
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
||||
if (ret) {
|
||||
btrfs_drop_pages(pages, num_pages);
|
||||
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1852,7 +1861,7 @@ again:
|
||||
if (only_release_metadata)
|
||||
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||
|
||||
btrfs_drop_pages(pages, num_pages);
|
||||
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||
|
||||
cond_resched();
|
||||
|
||||
@@ -2012,7 +2021,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
||||
* have opened a file as writable, we have to stop this write operation
|
||||
* to ensure consistency.
|
||||
*/
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &inode->root->fs_info->fs_state))
|
||||
if (BTRFS_FS_ERROR(inode->root->fs_info))
|
||||
return -EROFS;
|
||||
|
||||
if (!(iocb->ki_flags & IOCB_DIRECT) &&
|
||||
@@ -2620,7 +2629,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
|
||||
extent_info->disk_len, 0);
|
||||
ref_offset = extent_info->file_offset - extent_info->data_offset;
|
||||
btrfs_init_data_ref(&ref, root->root_key.objectid,
|
||||
btrfs_ino(inode), ref_offset);
|
||||
btrfs_ino(inode), ref_offset, 0, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "delalloc-space.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
#include "subpage.h"
|
||||
|
||||
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
|
||||
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
|
||||
@@ -411,7 +412,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
|
||||
|
||||
for (i = 0; i < io_ctl->num_pages; i++) {
|
||||
if (io_ctl->pages[i]) {
|
||||
ClearPageChecked(io_ctl->pages[i]);
|
||||
btrfs_page_clear_checked(io_ctl->fs_info,
|
||||
io_ctl->pages[i],
|
||||
page_offset(io_ctl->pages[i]),
|
||||
PAGE_SIZE);
|
||||
unlock_page(io_ctl->pages[i]);
|
||||
put_page(io_ctl->pages[i]);
|
||||
}
|
||||
@@ -2539,10 +2543,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
u64 offset = bytenr - block_group->start;
|
||||
u64 to_free, to_unusable;
|
||||
const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
|
||||
bool initial = (size == block_group->length);
|
||||
u64 reclaimable_unusable;
|
||||
|
||||
WARN_ON(!initial && offset + size > block_group->zone_capacity);
|
||||
|
||||
spin_lock(&ctl->tree_lock);
|
||||
if (!used)
|
||||
to_free = size;
|
||||
else if (initial)
|
||||
to_free = block_group->zone_capacity;
|
||||
else if (offset >= block_group->alloc_offset)
|
||||
to_free = size;
|
||||
else if (offset + size <= block_group->alloc_offset)
|
||||
@@ -2565,12 +2575,15 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
|
||||
reclaimable_unusable = block_group->zone_unusable -
|
||||
(block_group->length - block_group->zone_capacity);
|
||||
/* All the region is now unusable. Mark it as unused and reclaim */
|
||||
if (block_group->zone_unusable == block_group->length) {
|
||||
btrfs_mark_bg_unused(block_group);
|
||||
} else if (bg_reclaim_threshold &&
|
||||
block_group->zone_unusable >=
|
||||
div_factor_fine(block_group->length, bg_reclaim_threshold)) {
|
||||
reclaimable_unusable >=
|
||||
div_factor_fine(block_group->zone_capacity,
|
||||
bg_reclaim_threshold)) {
|
||||
btrfs_mark_bg_to_reclaim(block_group);
|
||||
}
|
||||
|
||||
@@ -2754,8 +2767,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||
* out the free space after the allocation offset.
|
||||
*/
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
btrfs_info(fs_info, "free space %llu",
|
||||
block_group->length - block_group->alloc_offset);
|
||||
btrfs_info(fs_info, "free space %llu active %d",
|
||||
block_group->zone_capacity - block_group->alloc_offset,
|
||||
block_group->zone_is_active);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
629
fs/btrfs/inode.c
629
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
1072
fs/btrfs/ioctl.c
1072
fs/btrfs/ioctl.c
File diff suppressed because it is too large
Load Diff
@@ -96,11 +96,12 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
|
||||
lockdep_assert_held(&eb->lock);
|
||||
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)
|
||||
{
|
||||
lockdep_assert_held_write(&eb->lock);
|
||||
}
|
||||
#else
|
||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
|
||||
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { }
|
||||
#endif
|
||||
|
||||
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
||||
|
||||
285
fs/btrfs/lzo.c
285
fs/btrfs/lzo.c
@@ -32,19 +32,19 @@
|
||||
* payload.
|
||||
* One regular LZO compressed extent can have one or more segments.
|
||||
* For inlined LZO compressed extent, only one segment is allowed.
|
||||
* One segment represents at most one page of uncompressed data.
|
||||
* One segment represents at most one sector of uncompressed data.
|
||||
*
|
||||
* 2.1 Segment header
|
||||
* Fixed size. LZO_LEN (4) bytes long, LE32.
|
||||
* Records the total size of the segment (not including the header).
|
||||
* Segment header never crosses page boundary, thus it's possible to
|
||||
* have at most 3 padding zeros at the end of the page.
|
||||
* Segment header never crosses sector boundary, thus it's possible to
|
||||
* have at most 3 padding zeros at the end of the sector.
|
||||
*
|
||||
* 2.2 Data Payload
|
||||
* Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
|
||||
* which is 4419 for a 4KiB page.
|
||||
* Variable size. Size up limit should be lzo1x_worst_compress(sectorsize)
|
||||
* which is 4419 for a 4KiB sectorsize.
|
||||
*
|
||||
* Example:
|
||||
* Example with 4K sectorsize:
|
||||
* Page 1:
|
||||
* 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
|
||||
* 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
|
||||
@@ -112,170 +112,174 @@ static inline size_t read_compress_length(const char *buf)
|
||||
return le32_to_cpu(dlen);
|
||||
}
|
||||
|
||||
/*
|
||||
* Will do:
|
||||
*
|
||||
* - Write a segment header into the destination
|
||||
* - Copy the compressed buffer into the destination
|
||||
* - Make sure we have enough space in the last sector to fit a segment header
|
||||
* If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros.
|
||||
*
|
||||
* Will allocate new pages when needed.
|
||||
*/
|
||||
static int copy_compressed_data_to_page(char *compressed_data,
|
||||
size_t compressed_size,
|
||||
struct page **out_pages,
|
||||
u32 *cur_out,
|
||||
const u32 sectorsize)
|
||||
{
|
||||
u32 sector_bytes_left;
|
||||
u32 orig_out;
|
||||
struct page *cur_page;
|
||||
char *kaddr;
|
||||
|
||||
/*
|
||||
* We never allow a segment header crossing sector boundary, previous
|
||||
* run should ensure we have enough space left inside the sector.
|
||||
*/
|
||||
ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize);
|
||||
|
||||
cur_page = out_pages[*cur_out / PAGE_SIZE];
|
||||
/* Allocate a new page */
|
||||
if (!cur_page) {
|
||||
cur_page = alloc_page(GFP_NOFS);
|
||||
if (!cur_page)
|
||||
return -ENOMEM;
|
||||
out_pages[*cur_out / PAGE_SIZE] = cur_page;
|
||||
}
|
||||
|
||||
kaddr = kmap(cur_page);
|
||||
write_compress_length(kaddr + offset_in_page(*cur_out),
|
||||
compressed_size);
|
||||
*cur_out += LZO_LEN;
|
||||
|
||||
orig_out = *cur_out;
|
||||
|
||||
/* Copy compressed data */
|
||||
while (*cur_out - orig_out < compressed_size) {
|
||||
u32 copy_len = min_t(u32, sectorsize - *cur_out % sectorsize,
|
||||
orig_out + compressed_size - *cur_out);
|
||||
|
||||
kunmap(cur_page);
|
||||
cur_page = out_pages[*cur_out / PAGE_SIZE];
|
||||
/* Allocate a new page */
|
||||
if (!cur_page) {
|
||||
cur_page = alloc_page(GFP_NOFS);
|
||||
if (!cur_page)
|
||||
return -ENOMEM;
|
||||
out_pages[*cur_out / PAGE_SIZE] = cur_page;
|
||||
}
|
||||
kaddr = kmap(cur_page);
|
||||
|
||||
memcpy(kaddr + offset_in_page(*cur_out),
|
||||
compressed_data + *cur_out - orig_out, copy_len);
|
||||
|
||||
*cur_out += copy_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we can fit the next segment header into the remaining space
|
||||
* of the sector.
|
||||
*/
|
||||
sector_bytes_left = round_up(*cur_out, sectorsize) - *cur_out;
|
||||
if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0)
|
||||
goto out;
|
||||
|
||||
/* The remaining size is not enough, pad it with zeros */
|
||||
memset(kaddr + offset_in_page(*cur_out), 0,
|
||||
sector_bytes_left);
|
||||
*cur_out += sector_bytes_left;
|
||||
|
||||
out:
|
||||
kunmap(cur_page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
|
||||
u64 start, struct page **pages, unsigned long *out_pages,
|
||||
unsigned long *total_in, unsigned long *total_out)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
|
||||
struct page *page_in = NULL;
|
||||
char *sizes_ptr;
|
||||
int ret = 0;
|
||||
char *data_in;
|
||||
char *cpage_out, *sizes_ptr;
|
||||
int nr_pages = 0;
|
||||
struct page *in_page = NULL;
|
||||
struct page *out_page = NULL;
|
||||
unsigned long bytes_left;
|
||||
unsigned long len = *total_out;
|
||||
unsigned long nr_dest_pages = *out_pages;
|
||||
const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
|
||||
size_t in_len;
|
||||
size_t out_len;
|
||||
char *buf;
|
||||
unsigned long tot_in = 0;
|
||||
unsigned long tot_out = 0;
|
||||
unsigned long pg_bytes_left;
|
||||
unsigned long out_offset;
|
||||
unsigned long bytes;
|
||||
/* Points to the file offset of input data */
|
||||
u64 cur_in = start;
|
||||
/* Points to the current output byte */
|
||||
u32 cur_out = 0;
|
||||
u32 len = *total_out;
|
||||
|
||||
*out_pages = 0;
|
||||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
|
||||
/*
|
||||
* store the size of all chunks of compressed data in
|
||||
* the first 4 bytes
|
||||
* Skip the header for now, we will later come back and write the total
|
||||
* compressed size
|
||||
*/
|
||||
out_page = alloc_page(GFP_NOFS);
|
||||
if (out_page == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
out_offset = LZO_LEN;
|
||||
tot_out = LZO_LEN;
|
||||
pages[0] = out_page;
|
||||
nr_pages = 1;
|
||||
pg_bytes_left = PAGE_SIZE - LZO_LEN;
|
||||
cur_out += LZO_LEN;
|
||||
while (cur_in < start + len) {
|
||||
char *data_in;
|
||||
const u32 sectorsize_mask = sectorsize - 1;
|
||||
u32 sector_off = (cur_in - start) & sectorsize_mask;
|
||||
u32 in_len;
|
||||
size_t out_len;
|
||||
|
||||
/* compress at most one page of data each time */
|
||||
in_len = min(len, PAGE_SIZE);
|
||||
while (tot_in < len) {
|
||||
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
|
||||
&out_len, workspace->mem);
|
||||
if (ret != LZO_E_OK) {
|
||||
pr_debug("BTRFS: lzo in loop returned %d\n",
|
||||
ret);
|
||||
/* Get the input page first */
|
||||
if (!page_in) {
|
||||
page_in = find_get_page(mapping, cur_in >> PAGE_SHIFT);
|
||||
ASSERT(page_in);
|
||||
}
|
||||
|
||||
/* Compress at most one sector of data each time */
|
||||
in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
|
||||
ASSERT(in_len);
|
||||
data_in = kmap(page_in);
|
||||
ret = lzo1x_1_compress(data_in +
|
||||
offset_in_page(cur_in), in_len,
|
||||
workspace->cbuf, &out_len,
|
||||
workspace->mem);
|
||||
kunmap(page_in);
|
||||
if (ret < 0) {
|
||||
pr_debug("BTRFS: lzo in loop returned %d\n", ret);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* store the size of this chunk of compressed data */
|
||||
write_compress_length(cpage_out + out_offset, out_len);
|
||||
tot_out += LZO_LEN;
|
||||
out_offset += LZO_LEN;
|
||||
pg_bytes_left -= LZO_LEN;
|
||||
ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
|
||||
pages, &cur_out, sectorsize);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
tot_in += in_len;
|
||||
tot_out += out_len;
|
||||
cur_in += in_len;
|
||||
|
||||
/* copy bytes from the working buffer into the pages */
|
||||
buf = workspace->cbuf;
|
||||
while (out_len) {
|
||||
bytes = min_t(unsigned long, pg_bytes_left, out_len);
|
||||
|
||||
memcpy(cpage_out + out_offset, buf, bytes);
|
||||
|
||||
out_len -= bytes;
|
||||
pg_bytes_left -= bytes;
|
||||
buf += bytes;
|
||||
out_offset += bytes;
|
||||
|
||||
/*
|
||||
* we need another page for writing out.
|
||||
*
|
||||
* Note if there's less than 4 bytes left, we just
|
||||
* skip to a new page.
|
||||
*/
|
||||
if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
|
||||
pg_bytes_left == 0) {
|
||||
if (pg_bytes_left) {
|
||||
memset(cpage_out + out_offset, 0,
|
||||
pg_bytes_left);
|
||||
tot_out += pg_bytes_left;
|
||||
}
|
||||
|
||||
/* we're done, don't allocate new page */
|
||||
if (out_len == 0 && tot_in >= len)
|
||||
break;
|
||||
|
||||
kunmap(out_page);
|
||||
if (nr_pages == nr_dest_pages) {
|
||||
out_page = NULL;
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out_page = alloc_page(GFP_NOFS);
|
||||
if (out_page == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cpage_out = kmap(out_page);
|
||||
pages[nr_pages++] = out_page;
|
||||
|
||||
pg_bytes_left = PAGE_SIZE;
|
||||
out_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* we're making it bigger, give up */
|
||||
if (tot_in > 8192 && tot_in < tot_out) {
|
||||
/*
|
||||
* Check if we're making it bigger after two sectors. And if
|
||||
* it is so, give up.
|
||||
*/
|
||||
if (cur_in - start > sectorsize * 2 && cur_in - start < cur_out) {
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* we're all done */
|
||||
if (tot_in >= len)
|
||||
break;
|
||||
|
||||
if (tot_out > max_out)
|
||||
break;
|
||||
|
||||
bytes_left = len - tot_in;
|
||||
kunmap(in_page);
|
||||
put_page(in_page);
|
||||
|
||||
start += PAGE_SIZE;
|
||||
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
|
||||
data_in = kmap(in_page);
|
||||
in_len = min(bytes_left, PAGE_SIZE);
|
||||
/* Check if we have reached page boundary */
|
||||
if (IS_ALIGNED(cur_in, PAGE_SIZE)) {
|
||||
put_page(page_in);
|
||||
page_in = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (tot_out >= tot_in) {
|
||||
ret = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* store the size of all chunks of compressed data */
|
||||
/* Store the size of all chunks of compressed data */
|
||||
sizes_ptr = kmap_local_page(pages[0]);
|
||||
write_compress_length(sizes_ptr, tot_out);
|
||||
write_compress_length(sizes_ptr, cur_out);
|
||||
kunmap_local(sizes_ptr);
|
||||
|
||||
ret = 0;
|
||||
*total_out = tot_out;
|
||||
*total_in = tot_in;
|
||||
*total_out = cur_out;
|
||||
*total_in = cur_in - start;
|
||||
out:
|
||||
*out_pages = nr_pages;
|
||||
if (out_page)
|
||||
kunmap(out_page);
|
||||
|
||||
if (in_page) {
|
||||
kunmap(in_page);
|
||||
put_page(in_page);
|
||||
}
|
||||
|
||||
*out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -357,9 +361,10 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
ASSERT(cur_in / sectorsize ==
|
||||
(cur_in + LZO_LEN - 1) / sectorsize);
|
||||
cur_page = cb->compressed_pages[cur_in / PAGE_SIZE];
|
||||
kaddr = kmap(cur_page);
|
||||
ASSERT(cur_page);
|
||||
kaddr = kmap(cur_page);
|
||||
seg_len = read_compress_length(kaddr + offset_in_page(cur_in));
|
||||
kunmap(cur_page);
|
||||
cur_in += LZO_LEN;
|
||||
|
||||
/* Copy the compressed segment payload into workspace */
|
||||
|
||||
@@ -60,8 +60,7 @@ enum btrfs_rbio_ops {
|
||||
};
|
||||
|
||||
struct btrfs_raid_bio {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_bio *bbio;
|
||||
struct btrfs_io_context *bioc;
|
||||
|
||||
/* while we're doing rmw on a stripe
|
||||
* we put it into a hash table so we can
|
||||
@@ -192,7 +191,7 @@ static void scrub_parity_work(struct btrfs_work *work);
|
||||
static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
|
||||
{
|
||||
btrfs_init_work(&rbio->work, work_func, NULL, NULL);
|
||||
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
|
||||
btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -271,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
|
||||
*/
|
||||
static int rbio_bucket(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
u64 num = rbio->bbio->raid_map[0];
|
||||
u64 num = rbio->bioc->raid_map[0];
|
||||
|
||||
/*
|
||||
* we shift down quite a bit. We're using byte
|
||||
@@ -345,7 +344,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
|
||||
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
||||
return;
|
||||
|
||||
table = rbio->fs_info->stripe_hash_table;
|
||||
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||
h = table->table + bucket;
|
||||
|
||||
/* hold the lock for the bucket because we may be
|
||||
@@ -400,7 +399,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
|
||||
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
||||
return;
|
||||
|
||||
table = rbio->fs_info->stripe_hash_table;
|
||||
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||
|
||||
spin_lock_irqsave(&table->cache_lock, flags);
|
||||
__remove_rbio_from_cache(rbio);
|
||||
@@ -460,7 +459,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
|
||||
if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
|
||||
return;
|
||||
|
||||
table = rbio->fs_info->stripe_hash_table;
|
||||
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||
|
||||
spin_lock_irqsave(&table->cache_lock, flags);
|
||||
spin_lock(&rbio->bio_list_lock);
|
||||
@@ -559,8 +558,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
|
||||
test_bit(RBIO_CACHE_BIT, &cur->flags))
|
||||
return 0;
|
||||
|
||||
if (last->bbio->raid_map[0] !=
|
||||
cur->bbio->raid_map[0])
|
||||
if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
|
||||
return 0;
|
||||
|
||||
/* we can't merge with different operations */
|
||||
@@ -669,11 +667,11 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
|
||||
struct btrfs_raid_bio *cache_drop = NULL;
|
||||
int ret = 0;
|
||||
|
||||
h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
|
||||
h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
|
||||
|
||||
spin_lock_irqsave(&h->lock, flags);
|
||||
list_for_each_entry(cur, &h->hash_list, hash_list) {
|
||||
if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
|
||||
if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
|
||||
continue;
|
||||
|
||||
spin_lock(&cur->bio_list_lock);
|
||||
@@ -751,7 +749,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
||||
int keep_cache = 0;
|
||||
|
||||
bucket = rbio_bucket(rbio);
|
||||
h = rbio->fs_info->stripe_hash_table->table + bucket;
|
||||
h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
|
||||
|
||||
if (list_empty(&rbio->plug_list))
|
||||
cache_rbio(rbio);
|
||||
@@ -838,7 +836,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
}
|
||||
|
||||
btrfs_put_bbio(rbio->bbio);
|
||||
btrfs_put_bioc(rbio->bioc);
|
||||
kfree(rbio);
|
||||
}
|
||||
|
||||
@@ -865,7 +863,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
|
||||
struct bio *extra;
|
||||
|
||||
if (rbio->generic_bio_cnt)
|
||||
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
|
||||
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
|
||||
|
||||
/*
|
||||
* At this moment, rbio->bio_list is empty, however since rbio does not
|
||||
@@ -906,7 +904,7 @@ static void raid_write_end_io(struct bio *bio)
|
||||
|
||||
/* OK, we have read all the stripes we need to. */
|
||||
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
|
||||
0 : rbio->bbio->max_errors;
|
||||
0 : rbio->bioc->max_errors;
|
||||
if (atomic_read(&rbio->error) > max_errors)
|
||||
err = BLK_STS_IOERR;
|
||||
|
||||
@@ -961,12 +959,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
|
||||
* this does not allocate any pages for rbio->pages.
|
||||
*/
|
||||
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_bio *bbio,
|
||||
struct btrfs_io_context *bioc,
|
||||
u64 stripe_len)
|
||||
{
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int nr_data = 0;
|
||||
int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
|
||||
int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
|
||||
int num_pages = rbio_nr_pages(stripe_len, real_stripes);
|
||||
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
|
||||
void *p;
|
||||
@@ -987,8 +985,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||
spin_lock_init(&rbio->bio_list_lock);
|
||||
INIT_LIST_HEAD(&rbio->stripe_cache);
|
||||
INIT_LIST_HEAD(&rbio->hash_list);
|
||||
rbio->bbio = bbio;
|
||||
rbio->fs_info = fs_info;
|
||||
rbio->bioc = bioc;
|
||||
rbio->stripe_len = stripe_len;
|
||||
rbio->nr_pages = num_pages;
|
||||
rbio->real_stripes = real_stripes;
|
||||
@@ -1015,9 +1012,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||
CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
|
||||
#undef CONSUME_ALLOC
|
||||
|
||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||
nr_data = real_stripes - 1;
|
||||
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||
nr_data = real_stripes - 2;
|
||||
else
|
||||
BUG();
|
||||
@@ -1077,10 +1074,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
||||
struct bio *last = bio_list->tail;
|
||||
int ret;
|
||||
struct bio *bio;
|
||||
struct btrfs_bio_stripe *stripe;
|
||||
struct btrfs_io_stripe *stripe;
|
||||
u64 disk_start;
|
||||
|
||||
stripe = &rbio->bbio->stripes[stripe_nr];
|
||||
stripe = &rbio->bioc->stripes[stripe_nr];
|
||||
disk_start = stripe->physical + (page_index << PAGE_SHIFT);
|
||||
|
||||
/* if the device is missing, just fail this stripe */
|
||||
@@ -1105,8 +1102,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
||||
}
|
||||
|
||||
/* put a new bio on the list */
|
||||
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
|
||||
btrfs_io_bio(bio)->device = stripe->dev;
|
||||
bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
|
||||
btrfs_bio(bio)->device = stripe->dev;
|
||||
bio->bi_iter.bi_size = 0;
|
||||
bio_set_dev(bio, stripe->dev->bdev);
|
||||
bio->bi_iter.bi_sector = disk_start >> 9;
|
||||
@@ -1155,11 +1152,11 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
||||
int i = 0;
|
||||
|
||||
start = bio->bi_iter.bi_sector << 9;
|
||||
stripe_offset = start - rbio->bbio->raid_map[0];
|
||||
stripe_offset = start - rbio->bioc->raid_map[0];
|
||||
page_index = stripe_offset >> PAGE_SHIFT;
|
||||
|
||||
if (bio_flagged(bio, BIO_CLONED))
|
||||
bio->bi_iter = btrfs_io_bio(bio)->iter;
|
||||
bio->bi_iter = btrfs_bio(bio)->iter;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
rbio->bio_pages[page_index + i] = bvec.bv_page;
|
||||
@@ -1179,7 +1176,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
||||
*/
|
||||
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
struct btrfs_bio *bbio = rbio->bbio;
|
||||
struct btrfs_io_context *bioc = rbio->bioc;
|
||||
void **pointers = rbio->finish_pointers;
|
||||
int nr_data = rbio->nr_data;
|
||||
int stripe;
|
||||
@@ -1284,11 +1281,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(!bbio->num_tgtdevs))
|
||||
if (likely(!bioc->num_tgtdevs))
|
||||
goto write_data;
|
||||
|
||||
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
||||
if (!bbio->tgtdev_map[stripe])
|
||||
if (!bioc->tgtdev_map[stripe])
|
||||
continue;
|
||||
|
||||
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
|
||||
@@ -1302,7 +1299,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
ret = rbio_add_io_page(rbio, &bio_list, page,
|
||||
rbio->bbio->tgtdev_map[stripe],
|
||||
rbio->bioc->tgtdev_map[stripe],
|
||||
pagenr, rbio->stripe_len);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
@@ -1339,12 +1336,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
|
||||
{
|
||||
u64 physical = bio->bi_iter.bi_sector;
|
||||
int i;
|
||||
struct btrfs_bio_stripe *stripe;
|
||||
struct btrfs_io_stripe *stripe;
|
||||
|
||||
physical <<= 9;
|
||||
|
||||
for (i = 0; i < rbio->bbio->num_stripes; i++) {
|
||||
stripe = &rbio->bbio->stripes[i];
|
||||
for (i = 0; i < rbio->bioc->num_stripes; i++) {
|
||||
stripe = &rbio->bioc->stripes[i];
|
||||
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
|
||||
stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
|
||||
return i;
|
||||
@@ -1365,7 +1362,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rbio->nr_data; i++) {
|
||||
u64 stripe_start = rbio->bbio->raid_map[i];
|
||||
u64 stripe_start = rbio->bioc->raid_map[i];
|
||||
|
||||
if (in_range(logical, stripe_start, rbio->stripe_len))
|
||||
return i;
|
||||
@@ -1456,7 +1453,7 @@ static void raid_rmw_end_io(struct bio *bio)
|
||||
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
||||
return;
|
||||
|
||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
||||
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
@@ -1538,8 +1535,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
/*
|
||||
* the bbio may be freed once we submit the last bio. Make sure
|
||||
* not to touch it after that
|
||||
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||
* touch it after that.
|
||||
*/
|
||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||
while ((bio = bio_list_pop(&bio_list))) {
|
||||
@@ -1547,7 +1544,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
||||
bio->bi_end_io = raid_rmw_end_io;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
|
||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
|
||||
submit_bio(bio);
|
||||
}
|
||||
@@ -1719,17 +1716,18 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
/*
|
||||
* our main entry point for writes from the rest of the FS.
|
||||
*/
|
||||
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len)
|
||||
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
struct btrfs_plug_cb *plug = NULL;
|
||||
struct blk_plug_cb *cb;
|
||||
int ret;
|
||||
|
||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
||||
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||
if (IS_ERR(rbio)) {
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
return PTR_ERR(rbio);
|
||||
}
|
||||
bio_list_add(&rbio->bio_list, bio);
|
||||
@@ -1842,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
/* all raid6 handling here */
|
||||
if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
|
||||
if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
|
||||
/*
|
||||
* single failure, rebuild from parity raid5
|
||||
* style
|
||||
@@ -1874,8 +1872,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
||||
* here due to a crc mismatch and we can't give them the
|
||||
* data they want
|
||||
*/
|
||||
if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
|
||||
if (rbio->bbio->raid_map[faila] ==
|
||||
if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
|
||||
if (rbio->bioc->raid_map[faila] ==
|
||||
RAID5_P_STRIPE) {
|
||||
err = BLK_STS_IOERR;
|
||||
goto cleanup;
|
||||
@@ -1887,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
||||
goto pstripe;
|
||||
}
|
||||
|
||||
if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
|
||||
if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
|
||||
raid6_datap_recov(rbio->real_stripes,
|
||||
PAGE_SIZE, faila, pointers);
|
||||
} else {
|
||||
@@ -2006,7 +2004,7 @@ static void raid_recover_end_io(struct bio *bio)
|
||||
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
||||
return;
|
||||
|
||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
||||
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
else
|
||||
__raid_recover_end_io(rbio);
|
||||
@@ -2074,7 +2072,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
||||
* were up to date, or we might have no bios to read because
|
||||
* the devices were gone.
|
||||
*/
|
||||
if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
|
||||
if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
|
||||
__raid_recover_end_io(rbio);
|
||||
return 0;
|
||||
} else {
|
||||
@@ -2083,8 +2081,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
/*
|
||||
* the bbio may be freed once we submit the last bio. Make sure
|
||||
* not to touch it after that
|
||||
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||
* touch it after that.
|
||||
*/
|
||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||
while ((bio = bio_list_pop(&bio_list))) {
|
||||
@@ -2092,7 +2090,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
||||
bio->bi_end_io = raid_recover_end_io;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
|
||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
|
||||
submit_bio(bio);
|
||||
}
|
||||
@@ -2116,22 +2114,22 @@ cleanup:
|
||||
* so we assume the bio they send down corresponds to a failed part
|
||||
* of the drive.
|
||||
*/
|
||||
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len,
|
||||
int mirror_num, int generic_io)
|
||||
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len, int mirror_num, int generic_io)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int ret;
|
||||
|
||||
if (generic_io) {
|
||||
ASSERT(bbio->mirror_num == mirror_num);
|
||||
btrfs_io_bio(bio)->mirror_num = mirror_num;
|
||||
ASSERT(bioc->mirror_num == mirror_num);
|
||||
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||
}
|
||||
|
||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
||||
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||
if (IS_ERR(rbio)) {
|
||||
if (generic_io)
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
return PTR_ERR(rbio);
|
||||
}
|
||||
|
||||
@@ -2142,11 +2140,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
rbio->faila = find_logical_bio_stripe(rbio, bio);
|
||||
if (rbio->faila == -1) {
|
||||
btrfs_warn(fs_info,
|
||||
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
|
||||
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
|
||||
__func__, bio->bi_iter.bi_sector << 9,
|
||||
(u64)bio->bi_iter.bi_size, bbio->map_type);
|
||||
(u64)bio->bi_iter.bi_size, bioc->map_type);
|
||||
if (generic_io)
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
kfree(rbio);
|
||||
return -EIO;
|
||||
}
|
||||
@@ -2155,7 +2153,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
btrfs_bio_counter_inc_noblocked(fs_info);
|
||||
rbio->generic_bio_cnt = 1;
|
||||
} else {
|
||||
btrfs_get_bbio(bbio);
|
||||
btrfs_get_bioc(bioc);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2214,23 +2212,23 @@ static void read_rebuild_work(struct btrfs_work *work)
|
||||
/*
|
||||
* The following code is used to scrub/replace the parity stripe
|
||||
*
|
||||
* Caller must have already increased bio_counter for getting @bbio.
|
||||
* Caller must have already increased bio_counter for getting @bioc.
|
||||
*
|
||||
* Note: We need make sure all the pages that add into the scrub/replace
|
||||
* raid bio are correct and not be changed during the scrub/replace. That
|
||||
* is those pages just hold metadata or file data with checksum.
|
||||
*/
|
||||
|
||||
struct btrfs_raid_bio *
|
||||
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len,
|
||||
struct btrfs_device *scrub_dev,
|
||||
unsigned long *dbitmap, int stripe_nsectors)
|
||||
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||
struct btrfs_io_context *bioc,
|
||||
u64 stripe_len, struct btrfs_device *scrub_dev,
|
||||
unsigned long *dbitmap, int stripe_nsectors)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int i;
|
||||
|
||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
||||
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||
if (IS_ERR(rbio))
|
||||
return NULL;
|
||||
bio_list_add(&rbio->bio_list, bio);
|
||||
@@ -2242,12 +2240,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
|
||||
|
||||
/*
|
||||
* After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
|
||||
* After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
|
||||
* to the end position, so this search can start from the first parity
|
||||
* stripe.
|
||||
*/
|
||||
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
|
||||
if (bbio->stripes[i].dev == scrub_dev) {
|
||||
if (bioc->stripes[i].dev == scrub_dev) {
|
||||
rbio->scrubp = i;
|
||||
break;
|
||||
}
|
||||
@@ -2260,7 +2258,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
|
||||
|
||||
/*
|
||||
* We have already increased bio_counter when getting bbio, record it
|
||||
* We have already increased bio_counter when getting bioc, record it
|
||||
* so we can free it at rbio_orig_end_io().
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
@@ -2275,10 +2273,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
||||
int stripe_offset;
|
||||
int index;
|
||||
|
||||
ASSERT(logical >= rbio->bbio->raid_map[0]);
|
||||
ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
|
||||
ASSERT(logical >= rbio->bioc->raid_map[0]);
|
||||
ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] +
|
||||
rbio->stripe_len * rbio->nr_data);
|
||||
stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
|
||||
stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
|
||||
index = stripe_offset >> PAGE_SHIFT;
|
||||
rbio->bio_pages[index] = page;
|
||||
}
|
||||
@@ -2312,7 +2310,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
|
||||
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
int need_check)
|
||||
{
|
||||
struct btrfs_bio *bbio = rbio->bbio;
|
||||
struct btrfs_io_context *bioc = rbio->bioc;
|
||||
void **pointers = rbio->finish_pointers;
|
||||
unsigned long *pbitmap = rbio->finish_pbitmap;
|
||||
int nr_data = rbio->nr_data;
|
||||
@@ -2335,7 +2333,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
else
|
||||
BUG();
|
||||
|
||||
if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
|
||||
if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
|
||||
is_replace = 1;
|
||||
bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
|
||||
}
|
||||
@@ -2435,7 +2433,7 @@ writeback:
|
||||
|
||||
page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
||||
ret = rbio_add_io_page(rbio, &bio_list, page,
|
||||
bbio->tgtdev_map[rbio->scrubp],
|
||||
bioc->tgtdev_map[rbio->scrubp],
|
||||
pagenr, rbio->stripe_len);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
@@ -2483,7 +2481,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
|
||||
*/
|
||||
static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
||||
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||
goto cleanup;
|
||||
|
||||
if (rbio->faila >= 0 || rbio->failb >= 0) {
|
||||
@@ -2504,7 +2502,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
||||
* the data, so the capability of the repair is declined.
|
||||
* (In the case of RAID5, we can not repair anything)
|
||||
*/
|
||||
if (dfail > rbio->bbio->max_errors - 1)
|
||||
if (dfail > rbio->bioc->max_errors - 1)
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
@@ -2625,8 +2623,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
||||
}
|
||||
|
||||
/*
|
||||
* the bbio may be freed once we submit the last bio. Make sure
|
||||
* not to touch it after that
|
||||
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||
* touch it after that.
|
||||
*/
|
||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||
while ((bio = bio_list_pop(&bio_list))) {
|
||||
@@ -2634,7 +2632,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
||||
bio->bi_end_io = raid56_parity_scrub_end_io;
|
||||
bio->bi_opf = REQ_OP_READ;
|
||||
|
||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||
|
||||
submit_bio(bio);
|
||||
}
|
||||
@@ -2670,12 +2668,13 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
|
||||
/* The following code is used for dev replace of a missing RAID 5/6 device. */
|
||||
|
||||
struct btrfs_raid_bio *
|
||||
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 length)
|
||||
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 length)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
|
||||
rbio = alloc_rbio(fs_info, bbio, length);
|
||||
rbio = alloc_rbio(fs_info, bioc, length);
|
||||
if (IS_ERR(rbio))
|
||||
return NULL;
|
||||
|
||||
@@ -2695,7 +2694,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
}
|
||||
|
||||
/*
|
||||
* When we get bbio, we have already increased bio_counter, record it
|
||||
* When we get bioc, we have already increased bio_counter, record it
|
||||
* so we can free it at rbio_orig_end_io()
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
@@ -30,25 +30,23 @@ static inline int nr_data_stripes(const struct map_lookup *map)
|
||||
struct btrfs_raid_bio;
|
||||
struct btrfs_device;
|
||||
|
||||
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len,
|
||||
int mirror_num, int generic_io);
|
||||
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len);
|
||||
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len, int mirror_num, int generic_io);
|
||||
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 stripe_len);
|
||||
|
||||
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
||||
u64 logical);
|
||||
|
||||
struct btrfs_raid_bio *
|
||||
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 stripe_len,
|
||||
struct btrfs_device *scrub_dev,
|
||||
unsigned long *dbitmap, int stripe_nsectors);
|
||||
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||
struct btrfs_io_context *bioc, u64 stripe_len,
|
||||
struct btrfs_device *scrub_dev,
|
||||
unsigned long *dbitmap, int stripe_nsectors);
|
||||
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
|
||||
|
||||
struct btrfs_raid_bio *
|
||||
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
struct btrfs_bio *bbio, u64 length);
|
||||
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
u64 length);
|
||||
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
|
||||
|
||||
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
|
||||
|
||||
@@ -227,7 +227,7 @@ start_machine:
|
||||
}
|
||||
|
||||
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||
struct btrfs_bio *bbio)
|
||||
struct btrfs_io_context *bioc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
int ret;
|
||||
@@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||
kref_init(&zone->refcnt);
|
||||
zone->elems = 0;
|
||||
zone->device = dev; /* our device always sits at index 0 */
|
||||
for (i = 0; i < bbio->num_stripes; ++i) {
|
||||
for (i = 0; i < bioc->num_stripes; ++i) {
|
||||
/* bounds have already been checked */
|
||||
zone->devs[i] = bbio->stripes[i].dev;
|
||||
zone->devs[i] = bioc->stripes[i].dev;
|
||||
}
|
||||
zone->ndevs = bbio->num_stripes;
|
||||
zone->ndevs = bioc->num_stripes;
|
||||
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
ret = radix_tree_insert(&dev->reada_zones,
|
||||
@@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
int ret;
|
||||
struct reada_extent *re = NULL;
|
||||
struct reada_extent *re_exist = NULL;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
struct btrfs_device *dev;
|
||||
struct btrfs_device *prev_dev;
|
||||
u64 length;
|
||||
@@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
length = fs_info->nodesize;
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||
&length, &bbio, 0);
|
||||
if (ret || !bbio || length < fs_info->nodesize)
|
||||
&length, &bioc, 0);
|
||||
if (ret || !bioc || length < fs_info->nodesize)
|
||||
goto error;
|
||||
|
||||
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
|
||||
if (bioc->num_stripes > BTRFS_MAX_MIRRORS) {
|
||||
btrfs_err(fs_info,
|
||||
"readahead: more than %d copies not supported",
|
||||
BTRFS_MAX_MIRRORS);
|
||||
goto error;
|
||||
}
|
||||
|
||||
real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
|
||||
real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
|
||||
for (nzones = 0; nzones < real_stripes; ++nzones) {
|
||||
struct reada_zone *zone;
|
||||
|
||||
dev = bbio->stripes[nzones].dev;
|
||||
dev = bioc->stripes[nzones].dev;
|
||||
|
||||
/* cannot read ahead on missing device. */
|
||||
if (!dev->bdev)
|
||||
continue;
|
||||
|
||||
zone = reada_find_zone(dev, logical, bbio);
|
||||
zone = reada_find_zone(dev, logical, bioc);
|
||||
if (!zone)
|
||||
continue;
|
||||
|
||||
@@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
if (!have_zone)
|
||||
goto error;
|
||||
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
return re;
|
||||
|
||||
error:
|
||||
@@ -488,7 +488,7 @@ error:
|
||||
kref_put(&zone->refcnt, reada_zone_release);
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
}
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
kfree(re);
|
||||
return re_exist;
|
||||
}
|
||||
|
||||
@@ -678,10 +678,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
|
||||
|
||||
if (generic_ref->type == BTRFS_REF_METADATA) {
|
||||
if (!parent)
|
||||
ref_root = generic_ref->tree_ref.root;
|
||||
ref_root = generic_ref->tree_ref.owning_root;
|
||||
owner = generic_ref->tree_ref.level;
|
||||
} else if (!parent) {
|
||||
ref_root = generic_ref->data_ref.ref_root;
|
||||
ref_root = generic_ref->data_ref.owning_root;
|
||||
owner = generic_ref->data_ref.ino;
|
||||
offset = generic_ref->data_ref.offset;
|
||||
}
|
||||
|
||||
@@ -138,7 +138,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
|
||||
}
|
||||
|
||||
btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
|
||||
ClearPageChecked(page);
|
||||
btrfs_page_clear_checked(fs_info, page, file_offset, block_size);
|
||||
btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
|
||||
out_unlock:
|
||||
if (page) {
|
||||
@@ -649,7 +649,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
|
||||
static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
int ret;
|
||||
int ret = 0;
|
||||
u64 i, tail_len, chunk_count;
|
||||
struct btrfs_root *root_dst = BTRFS_I(dst)->root;
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "backref.h"
|
||||
#include "misc.h"
|
||||
#include "subpage.h"
|
||||
#include "zoned.h"
|
||||
|
||||
/*
|
||||
* Relocation overview
|
||||
@@ -1145,9 +1146,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
key.offset -= btrfs_file_extent_offset(leaf, fi);
|
||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
||||
num_bytes, parent);
|
||||
ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset);
|
||||
key.objectid, key.offset,
|
||||
root->root_key.objectid, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1156,9 +1157,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||
num_bytes, parent);
|
||||
ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset);
|
||||
key.objectid, key.offset,
|
||||
root->root_key.objectid, false);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1367,8 +1368,8 @@ again:
|
||||
|
||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
|
||||
blocksize, path->nodes[level]->start);
|
||||
ref.skip_qgroup = true;
|
||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
|
||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
|
||||
0, true);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1376,8 +1377,8 @@ again:
|
||||
}
|
||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
||||
blocksize, 0);
|
||||
ref.skip_qgroup = true;
|
||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
|
||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
|
||||
true);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1386,8 +1387,8 @@ again:
|
||||
|
||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
|
||||
blocksize, path->nodes[level]->start);
|
||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
|
||||
ref.skip_qgroup = true;
|
||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
|
||||
0, true);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -1396,8 +1397,8 @@ again:
|
||||
|
||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
|
||||
blocksize, 0);
|
||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
|
||||
ref.skip_qgroup = true;
|
||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
|
||||
0, true);
|
||||
ret = btrfs_free_extent(trans, &ref);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@@ -2473,9 +2474,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
|
||||
node->eb->start, blocksize,
|
||||
upper->eb->start);
|
||||
ref.real_root = root->root_key.objectid;
|
||||
btrfs_init_tree_ref(&ref, node->level,
|
||||
btrfs_header_owner(upper->eb));
|
||||
btrfs_header_owner(upper->eb),
|
||||
root->root_key.objectid, false);
|
||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||
if (!ret)
|
||||
ret = btrfs_drop_subtree(trans, root, eb,
|
||||
@@ -2691,8 +2692,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
||||
list_add_tail(&node->list, &rc->backref_cache.changed);
|
||||
} else {
|
||||
path->lowest_level = node->level;
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_reserve_chunk_metadata(trans, false);
|
||||
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
||||
btrfs_release_path(path);
|
||||
if (root == root->fs_info->chunk_root)
|
||||
btrfs_trans_release_chunk_metadata(trans);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
}
|
||||
@@ -2852,31 +2857,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* On a zoned filesystem, we cannot preallocate the file region.
|
||||
* Instead, we dirty and fiemap_write the region.
|
||||
*/
|
||||
if (btrfs_is_zoned(inode->root->fs_info)) {
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
end = cluster->end - offset + 1;
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
|
||||
i_size_write(&inode->vfs_inode, end);
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return btrfs_end_transaction(trans);
|
||||
}
|
||||
|
||||
btrfs_inode_lock(&inode->vfs_inode, 0);
|
||||
for (nr = 0; nr < cluster->nr; nr++) {
|
||||
start = cluster->boundary[nr] - offset;
|
||||
@@ -2903,9 +2883,8 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack
|
||||
int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
|
||||
u64 block_start)
|
||||
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
|
||||
u64 start, u64 end, u64 block_start)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
struct extent_map *em;
|
||||
@@ -3084,7 +3063,6 @@ release_page:
|
||||
static int relocate_file_extent_cluster(struct inode *inode,
|
||||
struct file_extent_cluster *cluster)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
u64 offset = BTRFS_I(inode)->index_cnt;
|
||||
unsigned long index;
|
||||
unsigned long last_index;
|
||||
@@ -3105,7 +3083,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
||||
|
||||
file_ra_state_init(ra, inode->i_mapping);
|
||||
|
||||
ret = setup_extent_mapping(inode, cluster->start - offset,
|
||||
ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
|
||||
cluster->end - offset, cluster->start);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -3114,8 +3092,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
||||
for (index = (cluster->start - offset) >> PAGE_SHIFT;
|
||||
index <= last_index && !ret; index++)
|
||||
ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
|
||||
if (btrfs_is_zoned(fs_info) && !ret)
|
||||
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||
if (ret == 0)
|
||||
WARN_ON(cluster_nr != cluster->nr);
|
||||
out:
|
||||
@@ -3770,12 +3746,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_inode_item *item;
|
||||
struct extent_buffer *leaf;
|
||||
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
|
||||
int ret;
|
||||
|
||||
if (btrfs_is_zoned(trans->fs_info))
|
||||
flags &= ~BTRFS_INODE_PREALLOC;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
@@ -3790,7 +3762,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
|
||||
btrfs_set_inode_generation(leaf, item, 1);
|
||||
btrfs_set_inode_size(leaf, item, 0);
|
||||
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
|
||||
btrfs_set_inode_flags(leaf, item, flags);
|
||||
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
|
||||
BTRFS_INODE_PREALLOC);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@@ -4063,6 +4036,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
||||
rc->block_group->start,
|
||||
rc->block_group->length);
|
||||
|
||||
ret = btrfs_zone_finish(rc->block_group);
|
||||
WARN_ON(ret && ret != -EAGAIN);
|
||||
|
||||
while (1) {
|
||||
int finishes_stage;
|
||||
|
||||
@@ -4386,8 +4362,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
|
||||
if (!rc)
|
||||
return 0;
|
||||
|
||||
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
|
||||
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
|
||||
BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
|
||||
|
||||
level = btrfs_header_level(buf);
|
||||
if (btrfs_header_generation(buf) <=
|
||||
|
||||
139
fs/btrfs/scrub.c
139
fs/btrfs/scrub.c
@@ -57,7 +57,7 @@ struct scrub_ctx;
|
||||
|
||||
struct scrub_recover {
|
||||
refcount_t refs;
|
||||
struct btrfs_bio *bbio;
|
||||
struct btrfs_io_context *bioc;
|
||||
u64 map_length;
|
||||
};
|
||||
|
||||
@@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
|
||||
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
|
||||
{
|
||||
return spage->recover &&
|
||||
(spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
||||
(spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
||||
}
|
||||
|
||||
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
||||
@@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
if (refcount_dec_and_test(&recover->refs)) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(recover->bbio);
|
||||
btrfs_put_bioc(recover->bioc);
|
||||
kfree(recover);
|
||||
}
|
||||
}
|
||||
@@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
||||
sblock_other = sblocks_for_recheck + mirror_index;
|
||||
} else {
|
||||
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
|
||||
int max_allowed = r->bbio->num_stripes -
|
||||
r->bbio->num_tgtdevs;
|
||||
int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
|
||||
|
||||
if (mirror_index >= max_allowed)
|
||||
break;
|
||||
@@ -1218,14 +1217,14 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
|
||||
static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
|
||||
{
|
||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||
return 2;
|
||||
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||
return 3;
|
||||
else
|
||||
return (int)bbio->num_stripes;
|
||||
return (int)bioc->num_stripes;
|
||||
}
|
||||
|
||||
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
|
||||
@@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||
u64 flags = original_sblock->pagev[0]->flags;
|
||||
u64 have_csum = original_sblock->pagev[0]->have_csum;
|
||||
struct scrub_recover *recover;
|
||||
struct btrfs_bio *bbio;
|
||||
struct btrfs_io_context *bioc;
|
||||
u64 sublen;
|
||||
u64 mapped_length;
|
||||
u64 stripe_offset;
|
||||
@@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||
while (length > 0) {
|
||||
sublen = min_t(u64, length, fs_info->sectorsize);
|
||||
mapped_length = sublen;
|
||||
bbio = NULL;
|
||||
bioc = NULL;
|
||||
|
||||
/*
|
||||
* With a length of sectorsize, each returned stripe represents
|
||||
@@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
|
||||
logical, &mapped_length, &bbio);
|
||||
if (ret || !bbio || mapped_length < sublen) {
|
||||
btrfs_put_bbio(bbio);
|
||||
logical, &mapped_length, &bioc);
|
||||
if (ret || !bioc || mapped_length < sublen) {
|
||||
btrfs_put_bioc(bioc);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
||||
if (!recover) {
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
refcount_set(&recover->refs, 1);
|
||||
recover->bbio = bbio;
|
||||
recover->bioc = bioc;
|
||||
recover->map_length = mapped_length;
|
||||
|
||||
BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
||||
|
||||
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
|
||||
nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
|
||||
|
||||
for (mirror_index = 0; mirror_index < nmirrors;
|
||||
mirror_index++) {
|
||||
@@ -1348,17 +1347,17 @@ leave_nomem:
|
||||
sctx->fs_info->csum_size);
|
||||
|
||||
scrub_stripe_index_and_offset(logical,
|
||||
bbio->map_type,
|
||||
bbio->raid_map,
|
||||
bioc->map_type,
|
||||
bioc->raid_map,
|
||||
mapped_length,
|
||||
bbio->num_stripes -
|
||||
bbio->num_tgtdevs,
|
||||
bioc->num_stripes -
|
||||
bioc->num_tgtdevs,
|
||||
mirror_index,
|
||||
&stripe_index,
|
||||
&stripe_offset);
|
||||
spage->physical = bbio->stripes[stripe_index].physical +
|
||||
spage->physical = bioc->stripes[stripe_index].physical +
|
||||
stripe_offset;
|
||||
spage->dev = bbio->stripes[stripe_index].dev;
|
||||
spage->dev = bioc->stripes[stripe_index].dev;
|
||||
|
||||
BUG_ON(page_index >= original_sblock->page_count);
|
||||
spage->physical_for_dev_replace =
|
||||
@@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
||||
bio->bi_end_io = scrub_bio_wait_endio;
|
||||
|
||||
mirror_num = spage->sblock->pagev[0]->mirror_num;
|
||||
ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
|
||||
ret = raid56_parity_recover(bio, spage->recover->bioc,
|
||||
spage->recover->map_length,
|
||||
mirror_num, 0);
|
||||
if (ret)
|
||||
@@ -1423,7 +1422,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
|
||||
if (!first_page->dev->bdev)
|
||||
goto out;
|
||||
|
||||
bio = btrfs_io_bio_alloc(BIO_MAX_VECS);
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
bio_set_dev(bio, first_page->dev->bdev);
|
||||
|
||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||
@@ -1480,7 +1479,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
WARN_ON(!spage->page);
|
||||
bio = btrfs_io_bio_alloc(1);
|
||||
bio = btrfs_bio_alloc(1);
|
||||
bio_set_dev(bio, spage->dev->bdev);
|
||||
|
||||
bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
|
||||
@@ -1562,7 +1561,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
bio = btrfs_io_bio_alloc(1);
|
||||
bio = btrfs_bio_alloc(1);
|
||||
bio_set_dev(bio, spage_bad->dev->bdev);
|
||||
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
|
||||
bio->bi_opf = REQ_OP_WRITE;
|
||||
@@ -1676,7 +1675,7 @@ again:
|
||||
sbio->dev = sctx->wr_tgtdev;
|
||||
bio = sbio->bio;
|
||||
if (!bio) {
|
||||
bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
|
||||
bio = btrfs_bio_alloc(sctx->pages_per_wr_bio);
|
||||
sbio->bio = bio;
|
||||
}
|
||||
|
||||
@@ -2102,7 +2101,7 @@ again:
|
||||
sbio->dev = spage->dev;
|
||||
bio = sbio->bio;
|
||||
if (!bio) {
|
||||
bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
|
||||
bio = btrfs_bio_alloc(sctx->pages_per_rd_bio);
|
||||
sbio->bio = bio;
|
||||
}
|
||||
|
||||
@@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
u64 length = sblock->page_count * PAGE_SIZE;
|
||||
u64 logical = sblock->pagev[0]->logical;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
struct bio *bio;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int ret;
|
||||
@@ -2211,27 +2210,27 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||
&length, &bbio);
|
||||
if (ret || !bbio || !bbio->raid_map)
|
||||
goto bbio_out;
|
||||
&length, &bioc);
|
||||
if (ret || !bioc || !bioc->raid_map)
|
||||
goto bioc_out;
|
||||
|
||||
if (WARN_ON(!sctx->is_dev_replace ||
|
||||
!(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
|
||||
!(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
|
||||
/*
|
||||
* We shouldn't be scrubbing a missing device. Even for dev
|
||||
* replace, we should only get here for RAID 5/6. We either
|
||||
* managed to mount something with no mirrors remaining or
|
||||
* there's a bug in scrub_remap_extent()/btrfs_map_block().
|
||||
*/
|
||||
goto bbio_out;
|
||||
goto bioc_out;
|
||||
}
|
||||
|
||||
bio = btrfs_io_bio_alloc(0);
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
bio->bi_iter.bi_sector = logical >> 9;
|
||||
bio->bi_private = sblock;
|
||||
bio->bi_end_io = scrub_missing_raid56_end_io;
|
||||
|
||||
rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
|
||||
rbio = raid56_alloc_missing_rbio(bio, bioc, length);
|
||||
if (!rbio)
|
||||
goto rbio_out;
|
||||
|
||||
@@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
||||
|
||||
rbio_out:
|
||||
bio_put(bio);
|
||||
bbio_out:
|
||||
bioc_out:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
spin_lock(&sctx->stat_lock);
|
||||
sctx->stat.malloc_errors++;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
@@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
struct bio *bio;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
u64 length;
|
||||
int ret;
|
||||
|
||||
@@ -2838,17 +2837,17 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
|
||||
&length, &bbio);
|
||||
if (ret || !bbio || !bbio->raid_map)
|
||||
goto bbio_out;
|
||||
&length, &bioc);
|
||||
if (ret || !bioc || !bioc->raid_map)
|
||||
goto bioc_out;
|
||||
|
||||
bio = btrfs_io_bio_alloc(0);
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
|
||||
bio->bi_private = sparity;
|
||||
bio->bi_end_io = scrub_parity_bio_endio;
|
||||
|
||||
rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
|
||||
length, sparity->scrub_dev,
|
||||
rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length,
|
||||
sparity->scrub_dev,
|
||||
sparity->dbitmap,
|
||||
sparity->nsectors);
|
||||
if (!rbio)
|
||||
@@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
||||
|
||||
rbio_out:
|
||||
bio_put(bio);
|
||||
bbio_out:
|
||||
bioc_out:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
||||
sparity->nsectors);
|
||||
spin_lock(&sctx->stat_lock);
|
||||
@@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
||||
struct btrfs_root *root = fs_info->extent_root;
|
||||
struct btrfs_root *csum_root = fs_info->csum_root;
|
||||
struct btrfs_extent_item *extent;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
u64 flags;
|
||||
int ret;
|
||||
int slot;
|
||||
@@ -3044,22 +3043,22 @@ again:
|
||||
extent_len);
|
||||
|
||||
mapped_length = extent_len;
|
||||
bbio = NULL;
|
||||
bioc = NULL;
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
|
||||
extent_logical, &mapped_length, &bbio,
|
||||
extent_logical, &mapped_length, &bioc,
|
||||
0);
|
||||
if (!ret) {
|
||||
if (!bbio || mapped_length < extent_len)
|
||||
if (!bioc || mapped_length < extent_len)
|
||||
ret = -EIO;
|
||||
}
|
||||
if (ret) {
|
||||
btrfs_put_bbio(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
goto out;
|
||||
}
|
||||
extent_physical = bbio->stripes[0].physical;
|
||||
extent_mirror_num = bbio->mirror_num;
|
||||
extent_dev = bbio->stripes[0].dev;
|
||||
btrfs_put_bbio(bbio);
|
||||
extent_physical = bioc->stripes[0].physical;
|
||||
extent_mirror_num = bioc->mirror_num;
|
||||
extent_dev = bioc->stripes[0].dev;
|
||||
btrfs_put_bioc(bioc);
|
||||
|
||||
ret = btrfs_lookup_csums_range(csum_root,
|
||||
extent_logical,
|
||||
@@ -3956,7 +3955,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
|
||||
int ret;
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
return -EROFS;
|
||||
|
||||
/* Seed devices of a new filesystem has their own generation. */
|
||||
@@ -4068,6 +4067,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
u64 end, struct btrfs_scrub_progress *progress,
|
||||
int readonly, int is_dev_replace)
|
||||
{
|
||||
struct btrfs_dev_lookup_args args = { .devid = devid };
|
||||
struct scrub_ctx *sctx;
|
||||
int ret;
|
||||
struct btrfs_device *dev;
|
||||
@@ -4115,7 +4115,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
goto out_free_ctx;
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
||||
!is_dev_replace)) {
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
@@ -4288,11 +4288,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
|
||||
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
struct btrfs_scrub_progress *progress)
|
||||
{
|
||||
struct btrfs_dev_lookup_args args = { .devid = devid };
|
||||
struct btrfs_device *dev;
|
||||
struct scrub_ctx *sctx = NULL;
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
||||
dev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||
if (dev)
|
||||
sctx = dev->scrub_ctx;
|
||||
if (sctx)
|
||||
@@ -4309,20 +4310,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
||||
int *extent_mirror_num)
|
||||
{
|
||||
u64 mapped_length;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
int ret;
|
||||
|
||||
mapped_length = extent_len;
|
||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
|
||||
&mapped_length, &bbio, 0);
|
||||
if (ret || !bbio || mapped_length < extent_len ||
|
||||
!bbio->stripes[0].dev->bdev) {
|
||||
btrfs_put_bbio(bbio);
|
||||
&mapped_length, &bioc, 0);
|
||||
if (ret || !bioc || mapped_length < extent_len ||
|
||||
!bioc->stripes[0].dev->bdev) {
|
||||
btrfs_put_bioc(bioc);
|
||||
return;
|
||||
}
|
||||
|
||||
*extent_physical = bbio->stripes[0].physical;
|
||||
*extent_mirror_num = bbio->mirror_num;
|
||||
*extent_dev = bbio->stripes[0].dev;
|
||||
btrfs_put_bbio(bbio);
|
||||
*extent_physical = bioc->stripes[0].physical;
|
||||
*extent_mirror_num = bioc->mirror_num;
|
||||
*extent_dev = bioc->stripes[0].dev;
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
@@ -84,6 +84,8 @@ struct send_ctx {
|
||||
u64 total_send_size;
|
||||
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
|
||||
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
|
||||
/* Protocol version compatibility requested */
|
||||
u32 proto;
|
||||
|
||||
struct btrfs_root *send_root;
|
||||
struct btrfs_root *parent_root;
|
||||
@@ -312,6 +314,16 @@ static void inconsistent_snapshot_error(struct send_ctx *sctx,
|
||||
sctx->parent_root->root_key.objectid : 0));
|
||||
}
|
||||
|
||||
__maybe_unused
|
||||
static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
|
||||
{
|
||||
switch (sctx->proto) {
|
||||
case 1: return cmd < __BTRFS_SEND_C_MAX_V1;
|
||||
case 2: return cmd < __BTRFS_SEND_C_MAX_V2;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
|
||||
|
||||
static struct waiting_dir_move *
|
||||
@@ -2720,19 +2732,12 @@ static int send_create_inode_if_needed(struct send_ctx *sctx)
|
||||
if (S_ISDIR(sctx->cur_inode_mode)) {
|
||||
ret = did_create_dir(sctx, sctx->cur_ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = send_create_inode(sctx, sctx->cur_ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
return send_create_inode(sctx, sctx->cur_ino);
|
||||
}
|
||||
|
||||
struct recorded_ref {
|
||||
@@ -7276,6 +7281,17 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
|
||||
sctx->flags = arg->flags;
|
||||
|
||||
if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
|
||||
if (arg->version > BTRFS_SEND_STREAM_VERSION) {
|
||||
ret = -EPROTO;
|
||||
goto out;
|
||||
}
|
||||
/* Zero means "use the highest version" */
|
||||
sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
|
||||
} else {
|
||||
sctx->proto = 1;
|
||||
}
|
||||
|
||||
sctx->send_filp = fget(arg->send_fd);
|
||||
if (!sctx->send_filp) {
|
||||
ret = -EBADF;
|
||||
|
||||
@@ -48,6 +48,7 @@ struct btrfs_tlv_header {
|
||||
enum btrfs_send_cmd {
|
||||
BTRFS_SEND_C_UNSPEC,
|
||||
|
||||
/* Version 1 */
|
||||
BTRFS_SEND_C_SUBVOL,
|
||||
BTRFS_SEND_C_SNAPSHOT,
|
||||
|
||||
@@ -76,6 +77,12 @@ enum btrfs_send_cmd {
|
||||
|
||||
BTRFS_SEND_C_END,
|
||||
BTRFS_SEND_C_UPDATE_EXTENT,
|
||||
__BTRFS_SEND_C_MAX_V1,
|
||||
|
||||
/* Version 2 */
|
||||
__BTRFS_SEND_C_MAX_V2,
|
||||
|
||||
/* End */
|
||||
__BTRFS_SEND_C_MAX,
|
||||
};
|
||||
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
|
||||
|
||||
@@ -885,6 +885,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
struct reserve_ticket *ticket;
|
||||
u64 tickets_id = space_info->tickets_id;
|
||||
const bool aborted = BTRFS_FS_ERROR(fs_info);
|
||||
|
||||
trace_btrfs_fail_all_tickets(fs_info, space_info);
|
||||
|
||||
@@ -898,16 +899,19 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
||||
ticket = list_first_entry(&space_info->tickets,
|
||||
struct reserve_ticket, list);
|
||||
|
||||
if (ticket->steal &&
|
||||
if (!aborted && ticket->steal &&
|
||||
steal_from_global_rsv(fs_info, space_info, ticket))
|
||||
return true;
|
||||
|
||||
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
|
||||
if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
|
||||
btrfs_info(fs_info, "failing ticket with %llu bytes",
|
||||
ticket->bytes);
|
||||
|
||||
remove_ticket(space_info, ticket);
|
||||
ticket->error = -ENOSPC;
|
||||
if (aborted)
|
||||
ticket->error = -EIO;
|
||||
else
|
||||
ticket->error = -ENOSPC;
|
||||
wake_up(&ticket->wait);
|
||||
|
||||
/*
|
||||
@@ -916,7 +920,8 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
||||
* here to see if we can make progress with the next ticket in
|
||||
* the list.
|
||||
*/
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
if (!aborted)
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
}
|
||||
return (tickets_id != space_info->tickets_id);
|
||||
}
|
||||
@@ -1172,6 +1177,10 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
|
||||
spin_unlock(&space_info->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Something happened, fail everything and bail. */
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
goto aborted_fs;
|
||||
last_tickets_id = space_info->tickets_id;
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
@@ -1202,9 +1211,20 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
|
||||
} else {
|
||||
flush_state = 0;
|
||||
}
|
||||
|
||||
/* Something happened, fail everything and bail. */
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
goto aborted_fs;
|
||||
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
return;
|
||||
|
||||
aborted_fs:
|
||||
maybe_fail_all_tickets(fs_info, space_info);
|
||||
space_info->flush = 0;
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
|
||||
|
||||
@@ -63,11 +63,41 @@
|
||||
* This means a slightly higher tree locking latency.
|
||||
*/
|
||||
|
||||
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
|
||||
{
|
||||
unsigned int cur = 0;
|
||||
unsigned int nr_bits;
|
||||
|
||||
ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
|
||||
|
||||
nr_bits = PAGE_SIZE / sectorsize;
|
||||
subpage_info->bitmap_nr_bits = nr_bits;
|
||||
|
||||
subpage_info->uptodate_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->error_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->dirty_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->writeback_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->ordered_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->checked_offset = cur;
|
||||
cur += nr_bits;
|
||||
|
||||
subpage_info->total_nr_bits = cur;
|
||||
}
|
||||
|
||||
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, enum btrfs_subpage_type type)
|
||||
{
|
||||
struct btrfs_subpage *subpage = NULL;
|
||||
int ret;
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
/*
|
||||
* We have cases like a dummy extent buffer page, which is not mappped
|
||||
@@ -75,13 +105,15 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
if (page->mapping)
|
||||
ASSERT(PageLocked(page));
|
||||
|
||||
/* Either not subpage, or the page already has private attached */
|
||||
if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
|
||||
return 0;
|
||||
|
||||
ret = btrfs_alloc_subpage(fs_info, &subpage, type);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
subpage = btrfs_alloc_subpage(fs_info, type);
|
||||
if (IS_ERR(subpage))
|
||||
return PTR_ERR(subpage);
|
||||
|
||||
attach_page_private(page, subpage);
|
||||
return 0;
|
||||
}
|
||||
@@ -100,24 +132,28 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
btrfs_free_subpage(subpage);
|
||||
}
|
||||
|
||||
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_subpage **ret,
|
||||
enum btrfs_subpage_type type)
|
||||
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_subpage_type type)
|
||||
{
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
return 0;
|
||||
struct btrfs_subpage *ret;
|
||||
unsigned int real_size;
|
||||
|
||||
*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
|
||||
if (!*ret)
|
||||
return -ENOMEM;
|
||||
spin_lock_init(&(*ret)->lock);
|
||||
ASSERT(fs_info->sectorsize < PAGE_SIZE);
|
||||
|
||||
real_size = struct_size(ret, bitmaps,
|
||||
BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));
|
||||
ret = kzalloc(real_size, GFP_NOFS);
|
||||
if (!ret)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
spin_lock_init(&ret->lock);
|
||||
if (type == BTRFS_SUBPAGE_METADATA) {
|
||||
atomic_set(&(*ret)->eb_refs, 0);
|
||||
atomic_set(&ret->eb_refs, 0);
|
||||
} else {
|
||||
atomic_set(&(*ret)->readers, 0);
|
||||
atomic_set(&(*ret)->writers, 0);
|
||||
atomic_set(&ret->readers, 0);
|
||||
atomic_set(&ret->writers, 0);
|
||||
}
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_free_subpage(struct btrfs_subpage *subpage)
|
||||
@@ -222,8 +258,16 @@ static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
|
||||
u32 orig_len = *len;
|
||||
|
||||
*start = max_t(u64, page_offset(page), orig_start);
|
||||
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
|
||||
orig_start + orig_len) - *start;
|
||||
/*
|
||||
* For certain call sites like btrfs_drop_pages(), we may have pages
|
||||
* beyond the target range. In that case, just set @len to 0, subpage
|
||||
* helpers can handle @len == 0 without any problem.
|
||||
*/
|
||||
if (page_offset(page) >= orig_start + orig_len)
|
||||
*len = 0;
|
||||
else
|
||||
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
|
||||
orig_start + orig_len) - *start;
|
||||
}
|
||||
|
||||
void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
|
||||
@@ -248,6 +292,16 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
|
||||
|
||||
btrfs_subpage_assert(fs_info, page, start, len);
|
||||
|
||||
/*
|
||||
* We have call sites passing @lock_page into
|
||||
* extent_clear_unlock_delalloc() for compression path.
|
||||
*
|
||||
* This @locked_page is locked by plain lock_page(), thus its
|
||||
* subpage::writers is 0. Handle them in a special way.
|
||||
*/
|
||||
if (atomic_read(&subpage->writers) == 0)
|
||||
return true;
|
||||
|
||||
ASSERT(atomic_read(&subpage->writers) >= nbits);
|
||||
return atomic_sub_and_test(nbits, &subpage->writers);
|
||||
}
|
||||
@@ -289,37 +343,59 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the [start, start + len) range into a u16 bitmap
|
||||
*
|
||||
* For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
|
||||
*/
|
||||
static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start,
|
||||
unsigned int nbits)
|
||||
{
|
||||
const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
|
||||
const int nbits = len >> fs_info->sectorsize_bits;
|
||||
unsigned int found_zero;
|
||||
|
||||
btrfs_subpage_assert(fs_info, page, start, len);
|
||||
|
||||
/*
|
||||
* Here nbits can be 16, thus can go beyond u16 range. We make the
|
||||
* first left shift to be calculate in unsigned long (at least u32),
|
||||
* then truncate the result to u16.
|
||||
*/
|
||||
return (u16)(((1UL << nbits) - 1) << bit_start);
|
||||
found_zero = find_next_zero_bit(addr, start + nbits, start);
|
||||
if (found_zero == start + nbits)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start,
|
||||
unsigned int nbits)
|
||||
{
|
||||
unsigned int found_set;
|
||||
|
||||
found_set = find_next_bit(addr, start + nbits, start);
|
||||
if (found_set == start + nbits)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
#define subpage_calc_start_bit(fs_info, page, name, start, len) \
|
||||
({ \
|
||||
unsigned int start_bit; \
|
||||
\
|
||||
btrfs_subpage_assert(fs_info, page, start, len); \
|
||||
start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
|
||||
start_bit += fs_info->subpage_info->name##_offset; \
|
||||
start_bit; \
|
||||
})
|
||||
|
||||
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
|
||||
bitmap_test_range_all_set(subpage->bitmaps, \
|
||||
fs_info->subpage_info->name##_offset, \
|
||||
fs_info->subpage_info->bitmap_nr_bits)
|
||||
|
||||
#define subpage_test_bitmap_all_zero(fs_info, subpage, name) \
|
||||
bitmap_test_range_all_zero(subpage->bitmaps, \
|
||||
fs_info->subpage_info->name##_offset, \
|
||||
fs_info->subpage_info->bitmap_nr_bits)
|
||||
|
||||
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
uptodate, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->uptodate_bitmap |= tmp;
|
||||
if (subpage->uptodate_bitmap == U16_MAX)
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate))
|
||||
SetPageUptodate(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -328,11 +404,12 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
uptodate, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->uptodate_bitmap &= ~tmp;
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
ClearPageUptodate(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -341,11 +418,12 @@ void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
error, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->error_bitmap |= tmp;
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
SetPageError(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -354,12 +432,13 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
error, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->error_bitmap &= ~tmp;
|
||||
if (subpage->error_bitmap == 0)
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
|
||||
ClearPageError(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -368,11 +447,12 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
dirty, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->dirty_bitmap |= tmp;
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
set_page_dirty(page);
|
||||
}
|
||||
@@ -391,13 +471,14 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
dirty, start, len);
|
||||
unsigned long flags;
|
||||
bool last = false;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->dirty_bitmap &= ~tmp;
|
||||
if (subpage->dirty_bitmap == 0)
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty))
|
||||
last = true;
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
return last;
|
||||
@@ -417,11 +498,12 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
writeback, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->writeback_bitmap |= tmp;
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
set_page_writeback(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -430,12 +512,13 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
writeback, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->writeback_bitmap &= ~tmp;
|
||||
if (subpage->writeback_bitmap == 0) {
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) {
|
||||
ASSERT(PageWriteback(page));
|
||||
end_page_writeback(page);
|
||||
}
|
||||
@@ -446,11 +529,12 @@ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
ordered, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->ordered_bitmap |= tmp;
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
SetPageOrdered(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
@@ -459,15 +543,46 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
ordered, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
subpage->ordered_bitmap &= ~tmp;
|
||||
if (subpage->ordered_bitmap == 0)
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered))
|
||||
ClearPageOrdered(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
checked, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
if (subpage_test_bitmap_all_set(fs_info, subpage, checked))
|
||||
SetPageChecked(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||
checked, start, len);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||
ClearPageChecked(page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike set/clear which is dependent on each page status, for test all bits
|
||||
* are tested in the same way.
|
||||
@@ -477,12 +592,14 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
|
||||
struct page *page, u64 start, u32 len) \
|
||||
{ \
|
||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \
|
||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); \
|
||||
unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \
|
||||
name, start, len); \
|
||||
unsigned long flags; \
|
||||
bool ret; \
|
||||
\
|
||||
spin_lock_irqsave(&subpage->lock, flags); \
|
||||
ret = ((subpage->name##_bitmap & tmp) == tmp); \
|
||||
ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \
|
||||
len >> fs_info->sectorsize_bits); \
|
||||
spin_unlock_irqrestore(&subpage->lock, flags); \
|
||||
return ret; \
|
||||
}
|
||||
@@ -491,6 +608,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
|
||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
|
||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
|
||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
|
||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
|
||||
|
||||
/*
|
||||
* Note that, in selftests (extent-io-tests), we can have empty fs_info passed
|
||||
@@ -561,6 +679,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
|
||||
PageWriteback);
|
||||
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
|
||||
PageOrdered);
|
||||
IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked);
|
||||
|
||||
/*
|
||||
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
|
||||
@@ -579,5 +698,48 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||
return;
|
||||
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
ASSERT(subpage->dirty_bitmap == 0);
|
||||
ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty));
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle different locked pages with different page sizes:
|
||||
*
|
||||
* - Page locked by plain lock_page()
|
||||
* It should not have any subpage::writers count.
|
||||
* Can be unlocked by unlock_page().
|
||||
* This is the most common locked page for __extent_writepage() called
|
||||
* inside extent_write_cache_pages() or extent_write_full_page().
|
||||
* Rarer cases include the @locked_page from extent_write_locked_range().
|
||||
*
|
||||
* - Page locked by lock_delalloc_pages()
|
||||
* There is only one caller, all pages except @locked_page for
|
||||
* extent_write_locked_range().
|
||||
* In this case, we have to call subpage helper to handle the case.
|
||||
*/
|
||||
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
|
||||
u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
ASSERT(PageLocked(page));
|
||||
/* For regular page size case, we just unlock the page */
|
||||
if (fs_info->sectorsize == PAGE_SIZE)
|
||||
return unlock_page(page);
|
||||
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
subpage = (struct btrfs_subpage *)page->private;
|
||||
|
||||
/*
|
||||
* For subpage case, there are two types of locked page. With or
|
||||
* without writers number.
|
||||
*
|
||||
* Since we own the page lock, no one else could touch subpage::writers
|
||||
* and we are safe to do several atomic operations without spinlock.
|
||||
*/
|
||||
if (atomic_read(&subpage->writers))
|
||||
/* No writers, locked by plain lock_page() */
|
||||
return unlock_page(page);
|
||||
|
||||
/* Have writers, use proper subpage helper to end it */
|
||||
btrfs_page_end_writer_lock(fs_info, page, start, len);
|
||||
}
|
||||
|
||||
@@ -6,10 +6,38 @@
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
/*
|
||||
* Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
|
||||
* is sufficient. Regular bitmap_* is not used due to size reasons.
|
||||
* Extra info for subpapge bitmap.
|
||||
*
|
||||
* For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into
|
||||
* one larger bitmap.
|
||||
*
|
||||
* This structure records how they are organized in the bitmap:
|
||||
*
|
||||
* /- uptodate_offset /- error_offset /- dirty_offset
|
||||
* | | |
|
||||
* v v v
|
||||
* |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o|
|
||||
* |<- bitmap_nr_bits ->|
|
||||
* |<--------------- total_nr_bits ---------------->|
|
||||
*/
|
||||
#define BTRFS_SUBPAGE_BITMAP_SIZE 16
|
||||
struct btrfs_subpage_info {
|
||||
/* Number of bits for each bitmap */
|
||||
unsigned int bitmap_nr_bits;
|
||||
|
||||
/* Total number of bits for the whole bitmap */
|
||||
unsigned int total_nr_bits;
|
||||
|
||||
/*
|
||||
* *_start indicates where the bitmap starts, the length is always
|
||||
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
|
||||
*/
|
||||
unsigned int uptodate_offset;
|
||||
unsigned int error_offset;
|
||||
unsigned int dirty_offset;
|
||||
unsigned int writeback_offset;
|
||||
unsigned int ordered_offset;
|
||||
unsigned int checked_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure to trace status of each sector inside a page, attached to
|
||||
@@ -18,10 +46,6 @@
|
||||
struct btrfs_subpage {
|
||||
/* Common members for both data and metadata pages */
|
||||
spinlock_t lock;
|
||||
u16 uptodate_bitmap;
|
||||
u16 error_bitmap;
|
||||
u16 dirty_bitmap;
|
||||
u16 writeback_bitmap;
|
||||
/*
|
||||
* Both data and metadata needs to track how many readers are for the
|
||||
* page.
|
||||
@@ -38,14 +62,11 @@ struct btrfs_subpage {
|
||||
* manages whether the subpage can be detached.
|
||||
*/
|
||||
atomic_t eb_refs;
|
||||
/* Structures only used by data */
|
||||
struct {
|
||||
atomic_t writers;
|
||||
|
||||
/* Tracke pending ordered extent in this sector */
|
||||
u16 ordered_bitmap;
|
||||
};
|
||||
/* Structures only used by data */
|
||||
atomic_t writers;
|
||||
};
|
||||
unsigned long bitmaps[];
|
||||
};
|
||||
|
||||
enum btrfs_subpage_type {
|
||||
@@ -53,15 +74,15 @@ enum btrfs_subpage_type {
|
||||
BTRFS_SUBPAGE_DATA,
|
||||
};
|
||||
|
||||
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
|
||||
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, enum btrfs_subpage_type type);
|
||||
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page);
|
||||
|
||||
/* Allocate additional data where page represents more than one sector */
|
||||
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_subpage **ret,
|
||||
enum btrfs_subpage_type type);
|
||||
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_subpage_type type);
|
||||
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
||||
|
||||
void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
|
||||
@@ -122,11 +143,14 @@ DECLARE_BTRFS_SUBPAGE_OPS(error);
|
||||
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
|
||||
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
||||
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
||||
DECLARE_BTRFS_SUBPAGE_OPS(checked);
|
||||
|
||||
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page, u64 start, u32 len);
|
||||
|
||||
void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct page *page);
|
||||
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
|
||||
u64 start, u32 len);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1705,7 +1705,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
||||
goto error_close_devices;
|
||||
}
|
||||
|
||||
bdev = fs_devices->latest_bdev;
|
||||
bdev = fs_devices->latest_dev->bdev;
|
||||
s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
|
||||
fs_info);
|
||||
if (IS_ERR(s)) {
|
||||
@@ -2006,7 +2006,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (ret)
|
||||
goto restore;
|
||||
} else {
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
btrfs_err(fs_info,
|
||||
"Remounting read-write after error is not allowed");
|
||||
ret = -EINVAL;
|
||||
@@ -2463,30 +2463,16 @@ static int btrfs_unfreeze(struct super_block *sb)
|
||||
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
|
||||
struct btrfs_device *dev, *first_dev = NULL;
|
||||
|
||||
/*
|
||||
* Lightweight locking of the devices. We should not need
|
||||
* device_list_mutex here as we only read the device data and the list
|
||||
* is protected by RCU. Even if a device is deleted during the list
|
||||
* traversals, we'll get valid data, the freeing callback will wait at
|
||||
* least until the rcu_read_unlock.
|
||||
* There should be always a valid pointer in latest_dev, it may be stale
|
||||
* for a short moment in case it's being deleted but still valid until
|
||||
* the end of RCU grace period.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
|
||||
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
|
||||
continue;
|
||||
if (!dev->name)
|
||||
continue;
|
||||
if (!first_dev || dev->devid < first_dev->devid)
|
||||
first_dev = dev;
|
||||
}
|
||||
|
||||
if (first_dev)
|
||||
seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
|
||||
else
|
||||
WARN_ON(1);
|
||||
seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -177,7 +177,7 @@ static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
|
||||
} else
|
||||
val = can_modify_feature(fa);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return sysfs_emit(buf, "%d\n", val);
|
||||
}
|
||||
|
||||
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
||||
@@ -330,7 +330,7 @@ static const struct attribute_group btrfs_feature_attr_group = {
|
||||
static ssize_t rmdir_subvol_show(struct kobject *kobj,
|
||||
struct kobj_attribute *ka, char *buf)
|
||||
{
|
||||
return scnprintf(buf, PAGE_SIZE, "0\n");
|
||||
return sysfs_emit(buf, "0\n");
|
||||
}
|
||||
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
|
||||
|
||||
@@ -345,12 +345,12 @@ static ssize_t supported_checksums_show(struct kobject *kobj,
|
||||
* This "trick" only works as long as 'enum btrfs_csum_type' has
|
||||
* no holes in it
|
||||
*/
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||
(i == 0 ? "" : " "), btrfs_super_csum_name(i));
|
||||
ret += sysfs_emit_at(buf, ret, "%s%s", (i == 0 ? "" : " "),
|
||||
btrfs_super_csum_name(i));
|
||||
|
||||
}
|
||||
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||
ret += sysfs_emit_at(buf, ret, "\n");
|
||||
return ret;
|
||||
}
|
||||
BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
||||
@@ -358,7 +358,7 @@ BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
||||
static ssize_t send_stream_version_show(struct kobject *kobj,
|
||||
struct kobj_attribute *ka, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", BTRFS_SEND_STREAM_VERSION);
|
||||
return sysfs_emit(buf, "%d\n", BTRFS_SEND_STREAM_VERSION);
|
||||
}
|
||||
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
|
||||
|
||||
@@ -378,9 +378,8 @@ static ssize_t supported_rescue_options_show(struct kobject *kobj,
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||
(i ? " " : ""), rescue_opts[i]);
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||
ret += sysfs_emit_at(buf, ret, "%s%s", (i ? " " : ""), rescue_opts[i]);
|
||||
ret += sysfs_emit_at(buf, ret, "\n");
|
||||
return ret;
|
||||
}
|
||||
BTRFS_ATTR(static_feature, supported_rescue_options,
|
||||
@@ -394,10 +393,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
|
||||
|
||||
/* 4K sector size is also supported with 64K page size */
|
||||
if (PAGE_SIZE == SZ_64K)
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K);
|
||||
ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
|
||||
|
||||
/* Only sectorsize == PAGE_SIZE is now supported */
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);
|
||||
ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -437,7 +436,7 @@ static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return sysfs_emit(buf, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
||||
@@ -448,7 +447,7 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n",
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
||||
@@ -459,8 +458,8 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||
return sysfs_emit(buf, "%llu\n",
|
||||
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
||||
|
||||
@@ -470,7 +469,7 @@ static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return sysfs_emit(buf, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
||||
@@ -481,8 +480,8 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
fs_info->discard_ctl.discard_extent_bytes);
|
||||
return sysfs_emit(buf, "%llu\n",
|
||||
fs_info->discard_ctl.discard_extent_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
||||
|
||||
@@ -492,8 +491,8 @@ static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||
return sysfs_emit(buf, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
|
||||
@@ -523,8 +522,8 @@ static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||
return sysfs_emit(buf, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
|
||||
@@ -553,8 +552,8 @@ static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||
return sysfs_emit(buf, "%llu\n",
|
||||
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
|
||||
@@ -627,7 +626,7 @@ static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
|
||||
val = *value_ptr;
|
||||
if (lock)
|
||||
spin_unlock(lock);
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
return sysfs_emit(buf, "%llu\n", val);
|
||||
}
|
||||
|
||||
static ssize_t global_rsv_size_show(struct kobject *kobj,
|
||||
@@ -673,7 +672,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
val += block_group->used;
|
||||
}
|
||||
up_read(&sinfo->groups_sem);
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
return sysfs_emit(buf, "%llu\n", val);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -771,7 +770,7 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
|
||||
ssize_t ret;
|
||||
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ret = scnprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
||||
ret = sysfs_emit(buf, label[0] ? "%s\n" : "%s", label);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return ret;
|
||||
@@ -819,7 +818,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
|
||||
return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
|
||||
@@ -829,8 +828,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
fs_info->super_copy->sectorsize);
|
||||
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
|
||||
@@ -840,7 +838,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
|
||||
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
|
||||
@@ -852,7 +850,7 @@ static ssize_t quota_override_show(struct kobject *kobj,
|
||||
int quota_override;
|
||||
|
||||
quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", quota_override);
|
||||
return sysfs_emit(buf, "%d\n", quota_override);
|
||||
}
|
||||
|
||||
static ssize_t quota_override_store(struct kobject *kobj,
|
||||
@@ -890,8 +888,7 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%pU\n",
|
||||
fs_info->fs_devices->metadata_uuid);
|
||||
return sysfs_emit(buf, "%pU\n", fs_info->fs_devices->metadata_uuid);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
|
||||
@@ -902,9 +899,9 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%s (%s)\n",
|
||||
btrfs_super_csum_name(csum_type),
|
||||
crypto_shash_driver_name(fs_info->csum_shash));
|
||||
return sysfs_emit(buf, "%s (%s)\n",
|
||||
btrfs_super_csum_name(csum_type),
|
||||
crypto_shash_driver_name(fs_info->csum_shash));
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, checksum, btrfs_checksum_show);
|
||||
@@ -941,7 +938,7 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
|
||||
str = "UNKNOWN\n";
|
||||
break;
|
||||
}
|
||||
return scnprintf(buf, PAGE_SIZE, "%s", str);
|
||||
return sysfs_emit(buf, "%s", str);
|
||||
}
|
||||
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
|
||||
|
||||
@@ -950,7 +947,7 @@ static ssize_t btrfs_generation_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation);
|
||||
return sysfs_emit(buf, "%llu\n", fs_info->generation);
|
||||
}
|
||||
BTRFS_ATTR(, generation, btrfs_generation_show);
|
||||
|
||||
@@ -1028,8 +1025,7 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
ret = scnprintf(buf, PAGE_SIZE, "%d\n",
|
||||
READ_ONCE(fs_info->bg_reclaim_threshold));
|
||||
ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1471,7 +1467,7 @@ static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return sysfs_emit(buf, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
||||
|
||||
@@ -1484,7 +1480,7 @@ static ssize_t btrfs_devinfo_missing_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return sysfs_emit(buf, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show);
|
||||
|
||||
@@ -1498,7 +1494,7 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return sysfs_emit(buf, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
||||
|
||||
@@ -1509,8 +1505,7 @@ static ssize_t btrfs_devinfo_scrub_speed_max_show(struct kobject *kobj,
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
READ_ONCE(device->scrub_speed_max));
|
||||
return sysfs_emit(buf, "%llu\n", READ_ONCE(device->scrub_speed_max));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj,
|
||||
@@ -1538,7 +1533,7 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return sysfs_emit(buf, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
||||
|
||||
@@ -1549,14 +1544,14 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
|
||||
devid_kobj);
|
||||
|
||||
if (!device->dev_stats_valid)
|
||||
return scnprintf(buf, PAGE_SIZE, "invalid\n");
|
||||
return sysfs_emit(buf, "invalid\n");
|
||||
|
||||
/*
|
||||
* Print all at once so we get a snapshot of all values from the same
|
||||
* time. Keep them in sync and in order of definition of
|
||||
* btrfs_dev_stat_values.
|
||||
*/
|
||||
return scnprintf(buf, PAGE_SIZE,
|
||||
return sysfs_emit(buf,
|
||||
"write_errs %d\n"
|
||||
"read_errs %d\n"
|
||||
"flush_errs %d\n"
|
||||
|
||||
@@ -60,7 +60,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
setup_items_for_insert(root, path, &key, &value_len, 1);
|
||||
btrfs_setup_item_for_insert(root, path, &key, value_len);
|
||||
item = btrfs_item_nr(0);
|
||||
write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
|
||||
value_len);
|
||||
|
||||
@@ -112,7 +112,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
*/
|
||||
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
|
||||
start = 0;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@@ -143,7 +143,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
}
|
||||
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@@ -177,14 +177,14 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
goto out_bits;
|
||||
}
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (found) {
|
||||
test_err("found range when we shouldn't have");
|
||||
goto out_bits;
|
||||
}
|
||||
if (end != (u64)-1) {
|
||||
if (end != test_start + PAGE_SIZE - 1) {
|
||||
test_err("did not return the proper end offset");
|
||||
goto out_bits;
|
||||
}
|
||||
@@ -198,7 +198,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
*/
|
||||
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@@ -233,7 +233,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
/* We unlocked it in the previous test */
|
||||
lock_page(locked_page);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
/*
|
||||
* Currently if we fail to find dirty pages in the delalloc range we
|
||||
* will adjust max_bytes down to PAGE_SIZE and then re-search. If
|
||||
|
||||
@@ -33,7 +33,7 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = start;
|
||||
|
||||
setup_items_for_insert(root, &path, &key, &value_len, 1);
|
||||
btrfs_setup_item_for_insert(root, &path, &key, value_len);
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
btrfs_set_file_extent_generation(leaf, fi, 1);
|
||||
btrfs_set_file_extent_type(leaf, fi, type);
|
||||
@@ -63,7 +63,7 @@ static void insert_inode_item_key(struct btrfs_root *root)
|
||||
key.type = BTRFS_INODE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
setup_items_for_insert(root, &path, &key, &value_len, 1);
|
||||
btrfs_setup_item_for_insert(root, &path, &key, value_len);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -283,7 +283,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
loop:
|
||||
/* The file system has been taken offline. No new transactions. */
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
return -EROFS;
|
||||
}
|
||||
@@ -331,7 +331,7 @@ loop:
|
||||
*/
|
||||
kfree(cur_trans);
|
||||
goto loop;
|
||||
} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
} else if (BTRFS_FS_ERROR(fs_info)) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
kfree(cur_trans);
|
||||
return -EROFS;
|
||||
@@ -579,7 +579,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
bool do_chunk_alloc = false;
|
||||
int ret;
|
||||
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
|
||||
if (BTRFS_FS_ERROR(fs_info))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
if (current->journal_info) {
|
||||
@@ -991,8 +991,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
if (throttle)
|
||||
btrfs_run_delayed_iputs(info);
|
||||
|
||||
if (TRANS_ABORTED(trans) ||
|
||||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
|
||||
if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
|
||||
wake_up_process(info->transaction_kthread);
|
||||
if (TRANS_ABORTED(trans))
|
||||
err = trans->aborted;
|
||||
@@ -2155,7 +2154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* abort to prevent writing a new superblock that reflects a
|
||||
* corrupt state (pointing to trees with unwritten nodes/leafs).
|
||||
*/
|
||||
if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
ret = -EROFS;
|
||||
goto cleanup_transaction;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,8 @@ struct btrfs_log_ctx {
|
||||
int log_transid;
|
||||
bool log_new_dentries;
|
||||
bool logging_new_name;
|
||||
/* Tracks the last logged dir item/index key offset. */
|
||||
u64 last_dir_item_offset;
|
||||
struct inode *inode;
|
||||
struct list_head list;
|
||||
/* Only used for fast fsyncs. */
|
||||
@@ -68,14 +70,14 @@ int btrfs_recover_log_trees(struct btrfs_root *tree_root);
|
||||
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
|
||||
struct dentry *dentry,
|
||||
struct btrfs_log_ctx *ctx);
|
||||
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir, u64 index);
|
||||
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *inode, u64 dirid);
|
||||
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir, u64 index);
|
||||
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *inode, u64 dirid);
|
||||
void btrfs_end_log_trans(struct btrfs_root *root);
|
||||
void btrfs_pin_log_trans(struct btrfs_root *root);
|
||||
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -236,17 +236,40 @@ struct btrfs_fs_devices {
|
||||
bool fsid_change;
|
||||
struct list_head fs_list;
|
||||
|
||||
/*
|
||||
* Number of devices under this fsid including missing and
|
||||
* replace-target device and excludes seed devices.
|
||||
*/
|
||||
u64 num_devices;
|
||||
|
||||
/*
|
||||
* The number of devices that successfully opened, including
|
||||
* replace-target, excludes seed devices.
|
||||
*/
|
||||
u64 open_devices;
|
||||
|
||||
/* The number of devices that are under the chunk allocation list. */
|
||||
u64 rw_devices;
|
||||
|
||||
/* Count of missing devices under this fsid excluding seed device. */
|
||||
u64 missing_devices;
|
||||
u64 total_rw_bytes;
|
||||
|
||||
/*
|
||||
* Count of devices from btrfs_super_block::num_devices for this fsid,
|
||||
* which includes the seed device, excludes the transient replace-target
|
||||
* device.
|
||||
*/
|
||||
u64 total_devices;
|
||||
|
||||
/* Highest generation number of seen devices */
|
||||
u64 latest_generation;
|
||||
|
||||
struct block_device *latest_bdev;
|
||||
/*
|
||||
* The mount device or a device with highest generation after removal
|
||||
* or replace.
|
||||
*/
|
||||
struct btrfs_device *latest_dev;
|
||||
|
||||
/* all of the devices in the FS, protected by a mutex
|
||||
* so we can safely walk it to write out the supers without
|
||||
@@ -300,48 +323,62 @@ struct btrfs_fs_devices {
|
||||
/ sizeof(struct btrfs_stripe) + 1)
|
||||
|
||||
/*
|
||||
* we need the mirror number and stripe index to be passed around
|
||||
* the call chain while we are processing end_io (especially errors).
|
||||
* Really, what we need is a btrfs_bio structure that has this info
|
||||
* and is properly sized with its stripe array, but we're not there
|
||||
* quite yet. We have our own btrfs bioset, and all of the bios
|
||||
* we allocate are actually btrfs_io_bios. We'll cram as much of
|
||||
* struct btrfs_bio as we can into this over time.
|
||||
* Additional info to pass along bio.
|
||||
*
|
||||
* Mostly for btrfs specific features like csum and mirror_num.
|
||||
*/
|
||||
struct btrfs_io_bio {
|
||||
struct btrfs_bio {
|
||||
unsigned int mirror_num;
|
||||
|
||||
/* @device is for stripe IO submission. */
|
||||
struct btrfs_device *device;
|
||||
u64 logical;
|
||||
u8 *csum;
|
||||
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
||||
struct bvec_iter iter;
|
||||
|
||||
/*
|
||||
* This member must come last, bio_alloc_bioset will allocate enough
|
||||
* bytes for entire btrfs_io_bio but relies on bio being last.
|
||||
* bytes for entire btrfs_bio but relies on bio being last.
|
||||
*/
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
|
||||
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
|
||||
{
|
||||
return container_of(bio, struct btrfs_io_bio, bio);
|
||||
return container_of(bio, struct btrfs_bio, bio);
|
||||
}
|
||||
|
||||
static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio)
|
||||
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
|
||||
{
|
||||
if (io_bio->csum != io_bio->csum_inline) {
|
||||
kfree(io_bio->csum);
|
||||
io_bio->csum = NULL;
|
||||
if (bbio->csum != bbio->csum_inline) {
|
||||
kfree(bbio->csum);
|
||||
bbio->csum = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
struct btrfs_bio_stripe {
|
||||
struct btrfs_io_stripe {
|
||||
struct btrfs_device *dev;
|
||||
u64 physical;
|
||||
u64 length; /* only used for discard mappings */
|
||||
};
|
||||
|
||||
struct btrfs_bio {
|
||||
/*
|
||||
* Context for IO subsmission for device stripe.
|
||||
*
|
||||
* - Track the unfinished mirrors for mirror based profiles
|
||||
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
|
||||
*
|
||||
* - Contain the logical -> physical mapping info
|
||||
* Used by submit_stripe_bio() for mapping logical bio
|
||||
* into physical device address.
|
||||
*
|
||||
* - Contain device replace info
|
||||
* Used by handle_ops_on_dev_replace() to copy logical bios
|
||||
* into the new device.
|
||||
*
|
||||
* - Contain RAID56 full stripe logical bytenrs
|
||||
*/
|
||||
struct btrfs_io_context {
|
||||
refcount_t refs;
|
||||
atomic_t stripes_pending;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
@@ -361,7 +398,7 @@ struct btrfs_bio {
|
||||
* so raid_map[0] is the start of our full stripe
|
||||
*/
|
||||
u64 *raid_map;
|
||||
struct btrfs_bio_stripe stripes[];
|
||||
struct btrfs_io_stripe stripes[];
|
||||
};
|
||||
|
||||
struct btrfs_device_info {
|
||||
@@ -396,11 +433,11 @@ struct map_lookup {
|
||||
int num_stripes;
|
||||
int sub_stripes;
|
||||
int verified_stripes; /* For mount time dev extent verification */
|
||||
struct btrfs_bio_stripe stripes[];
|
||||
struct btrfs_io_stripe stripes[];
|
||||
};
|
||||
|
||||
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
|
||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
||||
(sizeof(struct btrfs_io_stripe) * (n)))
|
||||
|
||||
struct btrfs_balance_args;
|
||||
struct btrfs_balance_progress;
|
||||
@@ -414,6 +451,22 @@ struct btrfs_balance_control {
|
||||
struct btrfs_balance_progress stat;
|
||||
};
|
||||
|
||||
/*
|
||||
* Search for a given device by the set parameters
|
||||
*/
|
||||
struct btrfs_dev_lookup_args {
|
||||
u64 devid;
|
||||
u8 *uuid;
|
||||
u8 *fsid;
|
||||
bool missing;
|
||||
};
|
||||
|
||||
/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
|
||||
#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
|
||||
|
||||
#define BTRFS_DEV_LOOKUP_ARGS(name) \
|
||||
struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
|
||||
|
||||
enum btrfs_map_op {
|
||||
BTRFS_MAP_READ,
|
||||
BTRFS_MAP_WRITE,
|
||||
@@ -437,20 +490,20 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_get_bbio(struct btrfs_bio *bbio);
|
||||
void btrfs_put_bbio(struct btrfs_bio *bbio);
|
||||
void btrfs_get_bioc(struct btrfs_io_context *bioc);
|
||||
void btrfs_put_bioc(struct btrfs_io_context *bioc);
|
||||
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_bio **bbio_ret, int mirror_num);
|
||||
struct btrfs_io_context **bioc_ret, int mirror_num);
|
||||
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_bio **bbio_ret);
|
||||
struct btrfs_io_context **bioc_ret);
|
||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
|
||||
enum btrfs_map_op op, u64 logical,
|
||||
struct btrfs_io_geometry *io_geom);
|
||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
|
||||
u64 type);
|
||||
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
|
||||
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
@@ -467,19 +520,23 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
|
||||
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
|
||||
u64 devid,
|
||||
const char *devpath);
|
||||
int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_dev_lookup_args *args,
|
||||
const char *path);
|
||||
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
|
||||
const u64 *devid,
|
||||
const u8 *uuid);
|
||||
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
|
||||
void btrfs_free_device(struct btrfs_device *device);
|
||||
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
|
||||
const char *device_path, u64 devid,
|
||||
struct btrfs_dev_lookup_args *args,
|
||||
struct block_device **bdev, fmode_t *mode);
|
||||
void __exit btrfs_cleanup_fs_uuids(void);
|
||||
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device, u64 new_size);
|
||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
||||
u64 devid, u8 *uuid, u8 *fsid);
|
||||
struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
|
||||
const struct btrfs_dev_lookup_args *args);
|
||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
||||
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||
@@ -493,7 +550,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_uuid_scan_kthread(void *data);
|
||||
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *max_avail);
|
||||
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
|
||||
|
||||
@@ -138,7 +138,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
* matches our target xattr, so lets check.
|
||||
*/
|
||||
ret = 0;
|
||||
btrfs_assert_tree_locked(path->nodes[0]);
|
||||
btrfs_assert_tree_write_locked(path->nodes[0]);
|
||||
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
||||
if (!di && !(flags & XATTR_REPLACE)) {
|
||||
ret = -ENOSPC;
|
||||
|
||||
531
fs/btrfs/zoned.c
531
fs/btrfs/zoned.c
@@ -4,6 +4,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/atomic.h>
|
||||
#include "ctree.h"
|
||||
#include "volumes.h"
|
||||
#include "zoned.h"
|
||||
@@ -38,6 +39,16 @@
|
||||
/* Number of superblock log zones */
|
||||
#define BTRFS_NR_SB_LOG_ZONES 2
|
||||
|
||||
/*
|
||||
* Minimum of active zones we need:
|
||||
*
|
||||
* - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
|
||||
* - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
|
||||
* - 1 zone for tree-log dedicated block group
|
||||
* - 1 zone for relocation
|
||||
*/
|
||||
#define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5)
|
||||
|
||||
/*
|
||||
* Maximum supported zone size. Currently, SMR disks have a zone size of
|
||||
* 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
|
||||
@@ -45,6 +56,14 @@
|
||||
*/
|
||||
#define BTRFS_MAX_ZONE_SIZE SZ_8G
|
||||
|
||||
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
|
||||
|
||||
static inline bool sb_zone_is_full(const struct blk_zone *zone)
|
||||
{
|
||||
return (zone->cond == BLK_ZONE_COND_FULL) ||
|
||||
(zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity);
|
||||
}
|
||||
|
||||
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
|
||||
{
|
||||
struct blk_zone *zones = data;
|
||||
@@ -60,14 +79,13 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
||||
bool empty[BTRFS_NR_SB_LOG_ZONES];
|
||||
bool full[BTRFS_NR_SB_LOG_ZONES];
|
||||
sector_t sector;
|
||||
int i;
|
||||
|
||||
ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
|
||||
zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
||||
|
||||
empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY);
|
||||
empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY);
|
||||
full[0] = (zones[0].cond == BLK_ZONE_COND_FULL);
|
||||
full[1] = (zones[1].cond == BLK_ZONE_COND_FULL);
|
||||
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
||||
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
|
||||
full[i] = sb_zone_is_full(&zones[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Possible states of log buffer zones
|
||||
@@ -296,6 +314,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
struct btrfs_fs_info *fs_info = device->fs_info;
|
||||
struct btrfs_zoned_device_info *zone_info = NULL;
|
||||
struct block_device *bdev = device->bdev;
|
||||
struct request_queue *queue = bdev_get_queue(bdev);
|
||||
unsigned int max_active_zones;
|
||||
unsigned int nactive;
|
||||
sector_t nr_sectors;
|
||||
sector_t sector = 0;
|
||||
struct blk_zone *zones = NULL;
|
||||
@@ -351,6 +372,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
if (!IS_ALIGNED(nr_sectors, zone_sectors))
|
||||
zone_info->nr_zones++;
|
||||
|
||||
max_active_zones = queue_max_active_zones(queue);
|
||||
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
|
||||
btrfs_err_in_rcu(fs_info,
|
||||
"zoned: %s: max active zones %u is too small, need at least %u active zones",
|
||||
rcu_str_deref(device->name), max_active_zones,
|
||||
BTRFS_MIN_ACTIVE_ZONES);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
zone_info->max_active_zones = max_active_zones;
|
||||
|
||||
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||
if (!zone_info->seq_zones) {
|
||||
ret = -ENOMEM;
|
||||
@@ -363,6 +395,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
goto out;
|
||||
}
|
||||
|
||||
zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||
if (!zone_info->active_zones) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
|
||||
if (!zones) {
|
||||
ret = -ENOMEM;
|
||||
@@ -370,6 +408,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
}
|
||||
|
||||
/* Get zones type */
|
||||
nactive = 0;
|
||||
while (sector < nr_sectors) {
|
||||
nr_zones = BTRFS_REPORT_NR_ZONES;
|
||||
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
|
||||
@@ -380,8 +419,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
for (i = 0; i < nr_zones; i++) {
|
||||
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
|
||||
__set_bit(nreported, zone_info->seq_zones);
|
||||
if (zones[i].cond == BLK_ZONE_COND_EMPTY)
|
||||
switch (zones[i].cond) {
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
__set_bit(nreported, zone_info->empty_zones);
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
__set_bit(nreported, zone_info->active_zones);
|
||||
nactive++;
|
||||
break;
|
||||
}
|
||||
nreported++;
|
||||
}
|
||||
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
|
||||
@@ -396,6 +444,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (max_active_zones) {
|
||||
if (nactive > max_active_zones) {
|
||||
btrfs_err_in_rcu(device->fs_info,
|
||||
"zoned: %u active zones on %s exceeds max_active_zones %u",
|
||||
nactive, rcu_str_deref(device->name),
|
||||
max_active_zones);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
atomic_set(&zone_info->active_zones_left,
|
||||
max_active_zones - nactive);
|
||||
}
|
||||
|
||||
/* Validate superblock log */
|
||||
nr_zones = BTRFS_NR_SB_LOG_ZONES;
|
||||
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
||||
@@ -478,6 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
||||
out:
|
||||
kfree(zones);
|
||||
out_free_zone_info:
|
||||
bitmap_free(zone_info->active_zones);
|
||||
bitmap_free(zone_info->empty_zones);
|
||||
bitmap_free(zone_info->seq_zones);
|
||||
kfree(zone_info);
|
||||
@@ -493,6 +555,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
|
||||
if (!zone_info)
|
||||
return;
|
||||
|
||||
bitmap_free(zone_info->active_zones);
|
||||
bitmap_free(zone_info->seq_zones);
|
||||
bitmap_free(zone_info->empty_zones);
|
||||
kfree(zone_info);
|
||||
@@ -585,7 +648,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
|
||||
/*
|
||||
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
|
||||
* __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
|
||||
* btrfs_create_chunk(). Since we want stripe_len == zone_size,
|
||||
* check the alignment here.
|
||||
*/
|
||||
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
|
||||
@@ -664,7 +727,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
|
||||
reset = &zones[1];
|
||||
|
||||
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
|
||||
ASSERT(reset->cond == BLK_ZONE_COND_FULL);
|
||||
ASSERT(sb_zone_is_full(reset));
|
||||
|
||||
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
||||
reset->start, reset->len,
|
||||
@@ -676,9 +739,20 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
|
||||
reset->wp = reset->start;
|
||||
}
|
||||
} else if (ret != -ENOENT) {
|
||||
/* For READ, we want the precious one */
|
||||
/*
|
||||
* For READ, we want the previous one. Move write pointer to
|
||||
* the end of a zone, if it is at the head of a zone.
|
||||
*/
|
||||
u64 zone_end = 0;
|
||||
|
||||
if (wp == zones[0].start << SECTOR_SHIFT)
|
||||
wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
|
||||
zone_end = zones[1].start + zones[1].capacity;
|
||||
else if (wp == zones[1].start << SECTOR_SHIFT)
|
||||
zone_end = zones[0].start + zones[0].capacity;
|
||||
if (zone_end)
|
||||
wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
|
||||
wp -= BTRFS_SUPER_INFO_SIZE;
|
||||
}
|
||||
|
||||
@@ -771,36 +845,56 @@ static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||
{
|
||||
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||
struct blk_zone *zone;
|
||||
int i;
|
||||
|
||||
if (!is_sb_log_zone(zinfo, mirror))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
|
||||
if (zone->cond != BLK_ZONE_COND_FULL) {
|
||||
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
/* Advance the next zone */
|
||||
if (zone->cond == BLK_ZONE_COND_FULL) {
|
||||
zone++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
|
||||
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
||||
zone->wp += SUPER_INFO_SECTORS;
|
||||
|
||||
if (zone->wp == zone->start + zone->len)
|
||||
if (sb_zone_is_full(zone)) {
|
||||
/*
|
||||
* No room left to write new superblock. Since
|
||||
* superblock is written with REQ_SYNC, it is safe to
|
||||
* finish the zone now.
|
||||
*
|
||||
* If the write pointer is exactly at the capacity,
|
||||
* explicit ZONE_FINISH is not necessary.
|
||||
*/
|
||||
if (zone->wp != zone->start + zone->capacity) {
|
||||
int ret;
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev,
|
||||
REQ_OP_ZONE_FINISH, zone->start,
|
||||
zone->len, GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
zone->wp = zone->start + zone->len;
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
|
||||
return;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
zone++;
|
||||
ASSERT(zone->cond != BLK_ZONE_COND_FULL);
|
||||
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
|
||||
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
||||
|
||||
if (zone->wp == zone->start + zone->len)
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
/* All the zones are FULL. Should not reach here. */
|
||||
ASSERT(0);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||
@@ -895,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
||||
return pos;
|
||||
}
|
||||
|
||||
static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
|
||||
{
|
||||
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||
|
||||
/* We can use any number of zones */
|
||||
if (zone_info->max_active_zones == 0)
|
||||
return true;
|
||||
|
||||
if (!test_bit(zno, zone_info->active_zones)) {
|
||||
/* Active zone left? */
|
||||
if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
|
||||
return false;
|
||||
if (test_and_set_bit(zno, zone_info->active_zones)) {
|
||||
/* Someone already set the bit */
|
||||
atomic_inc(&zone_info->active_zones_left);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
|
||||
{
|
||||
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||
|
||||
/* We can use any number of zones */
|
||||
if (zone_info->max_active_zones == 0)
|
||||
return;
|
||||
|
||||
if (test_and_clear_bit(zno, zone_info->active_zones))
|
||||
atomic_inc(&zone_info->active_zones_left);
|
||||
}
|
||||
|
||||
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
||||
u64 length, u64 *bytes)
|
||||
{
|
||||
@@ -910,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
||||
*bytes = length;
|
||||
while (length) {
|
||||
btrfs_dev_set_zone_empty(device, physical);
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
physical += device->zone_info->zone_size;
|
||||
length -= device->zone_info->zone_size;
|
||||
}
|
||||
@@ -1039,6 +1169,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
int i;
|
||||
unsigned int nofs_flag;
|
||||
u64 *alloc_offsets = NULL;
|
||||
u64 *caps = NULL;
|
||||
unsigned long *active = NULL;
|
||||
u64 last_alloc = 0;
|
||||
u32 num_sequential = 0, num_conventional = 0;
|
||||
|
||||
@@ -1063,10 +1195,28 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
|
||||
map = em->map_lookup;
|
||||
|
||||
cache->physical_map = kmemdup(map, map_lookup_size(map->num_stripes), GFP_NOFS);
|
||||
if (!cache->physical_map) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
|
||||
if (!alloc_offsets) {
|
||||
free_extent_map(em);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS);
|
||||
if (!caps) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
|
||||
if (!active) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
@@ -1131,6 +1281,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
|
||||
caps[i] = (zone.capacity << SECTOR_SHIFT);
|
||||
|
||||
switch (zone.cond) {
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
@@ -1144,14 +1296,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
alloc_offsets[i] = 0;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
alloc_offsets[i] = fs_info->zone_size;
|
||||
alloc_offsets[i] = caps[i];
|
||||
break;
|
||||
default:
|
||||
/* Partially used zone */
|
||||
alloc_offsets[i] =
|
||||
((zone.wp - zone.start) << SECTOR_SHIFT);
|
||||
__set_bit(i, active);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider a zone as active if we can allow any number of
|
||||
* active zones.
|
||||
*/
|
||||
if (!device->zone_info->max_active_zones)
|
||||
__set_bit(i, active);
|
||||
}
|
||||
|
||||
if (num_sequential > 0)
|
||||
@@ -1169,6 +1329,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
* calculate_alloc_pointer() which takes extent buffer
|
||||
* locks to avoid deadlock.
|
||||
*/
|
||||
|
||||
/* Zone capacity is always zone size in emulation */
|
||||
cache->zone_capacity = cache->length;
|
||||
if (new) {
|
||||
cache->alloc_offset = 0;
|
||||
goto out;
|
||||
@@ -1195,6 +1358,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
cache->alloc_offset = alloc_offsets[0];
|
||||
cache->zone_capacity = caps[0];
|
||||
cache->zone_is_active = test_bit(0, active);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_DUP:
|
||||
case BTRFS_BLOCK_GROUP_RAID1:
|
||||
@@ -1210,6 +1375,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (cache->zone_is_active) {
|
||||
btrfs_get_block_group(cache);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
if (cache->alloc_offset > fs_info->zone_size) {
|
||||
btrfs_err(fs_info,
|
||||
@@ -1218,6 +1390,14 @@ out:
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
if (cache->alloc_offset > cache->zone_capacity) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
|
||||
cache->alloc_offset, cache->zone_capacity,
|
||||
cache->start);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
/* An extent is allocated after the write pointer */
|
||||
if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
|
||||
btrfs_err(fs_info,
|
||||
@@ -1229,6 +1409,12 @@ out:
|
||||
if (!ret)
|
||||
cache->meta_write_pointer = cache->alloc_offset + cache->start;
|
||||
|
||||
if (ret) {
|
||||
kfree(cache->physical_map);
|
||||
cache->physical_map = NULL;
|
||||
}
|
||||
bitmap_free(active);
|
||||
kfree(caps);
|
||||
kfree(alloc_offsets);
|
||||
free_extent_map(em);
|
||||
|
||||
@@ -1243,17 +1429,15 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
|
||||
return;
|
||||
|
||||
WARN_ON(cache->bytes_super != 0);
|
||||
unusable = cache->alloc_offset - cache->used;
|
||||
free = cache->length - cache->alloc_offset;
|
||||
unusable = (cache->alloc_offset - cache->used) +
|
||||
(cache->length - cache->zone_capacity);
|
||||
free = cache->zone_capacity - cache->alloc_offset;
|
||||
|
||||
/* We only need ->free_space in ALLOC_SEQ block groups */
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
cache->free_space_ctl->free_space = free;
|
||||
cache->zone_unusable = unusable;
|
||||
|
||||
/* Should not have any excluded extents. Just in case, though */
|
||||
btrfs_free_excluded_extents(cache);
|
||||
}
|
||||
|
||||
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
|
||||
@@ -1304,6 +1488,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
|
||||
if (!is_data_inode(&inode->vfs_inode))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
|
||||
* extent layout the relocation code has.
|
||||
* Furthermore we have set aside own block-group from which only the
|
||||
* relocation "process" can allocate and make sure only one process at a
|
||||
* time can add pages to an extent that gets relocated, so it's safe to
|
||||
* use regular REQ_OP_WRITE for this special case.
|
||||
*/
|
||||
if (btrfs_is_data_reloc_root(inode->root))
|
||||
return false;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, start);
|
||||
ASSERT(cache);
|
||||
if (!cache)
|
||||
@@ -1440,27 +1635,27 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len
|
||||
static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
struct blk_zone *zone)
|
||||
{
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
u64 mapped_length = PAGE_SIZE;
|
||||
unsigned int nofs_flag;
|
||||
int nmirrors;
|
||||
int i, ret;
|
||||
|
||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||
&mapped_length, &bbio);
|
||||
if (ret || !bbio || mapped_length < PAGE_SIZE) {
|
||||
btrfs_put_bbio(bbio);
|
||||
&mapped_length, &bioc);
|
||||
if (ret || !bioc || mapped_length < PAGE_SIZE) {
|
||||
btrfs_put_bioc(bioc);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
nmirrors = (int)bbio->num_stripes;
|
||||
nmirrors = (int)bioc->num_stripes;
|
||||
for (i = 0; i < nmirrors; i++) {
|
||||
u64 physical = bbio->stripes[i].physical;
|
||||
struct btrfs_device *dev = bbio->stripes[i].dev;
|
||||
u64 physical = bioc->stripes[i].physical;
|
||||
struct btrfs_device *dev = bioc->stripes[i].dev;
|
||||
|
||||
/* Missing device */
|
||||
if (!dev->bdev)
|
||||
@@ -1530,3 +1725,251 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
/**
|
||||
* Activate block group and underlying device zones
|
||||
*
|
||||
* @block_group: the block group to activate
|
||||
*
|
||||
* Return: true on success, false otherwise
|
||||
*/
|
||||
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
bool ret;
|
||||
|
||||
if (!btrfs_is_zoned(block_group->fs_info))
|
||||
return true;
|
||||
|
||||
map = block_group->physical_map;
|
||||
/* Currently support SINGLE profile only */
|
||||
ASSERT(map->num_stripes == 1);
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
return true;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (block_group->zone_is_active) {
|
||||
ret = true;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* No space left */
|
||||
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||
/* Cannot activate the zone */
|
||||
ret = false;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Successfully activated all the zones */
|
||||
block_group->zone_is_active = 1;
|
||||
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
/* For the active block group list */
|
||||
btrfs_get_block_group(block_group);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(list_empty(&block_group->active_bg_list));
|
||||
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
return true;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&block_group->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
int ret = 0;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
map = block_group->physical_map;
|
||||
/* Currently support SINGLE profile only */
|
||||
ASSERT(map->num_stripes == 1);
|
||||
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
return 0;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check if we have unwritten allocated space */
|
||||
if ((block_group->flags &
|
||||
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
|
||||
block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Ensure all writes in this block group finish */
|
||||
btrfs_wait_block_group_reservations(block_group);
|
||||
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||
block_group->length);
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
/*
|
||||
* Bail out if someone already deactivated the block group, or
|
||||
* allocated space is left in the block group.
|
||||
*/
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (block_group->reserved) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
block_group->zone_is_active = 0;
|
||||
block_group->alloc_offset = block_group->zone_capacity;
|
||||
block_group->free_space_ctl->free_space = 0;
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
|
||||
if (!ret) {
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
/* For active_bg_list */
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
bool ret = false;
|
||||
|
||||
if (!btrfs_is_zoned(fs_devices->fs_info))
|
||||
return true;
|
||||
|
||||
/* Non-single profiles are not supported yet */
|
||||
if (raid_index != BTRFS_RAID_SINGLE)
|
||||
return false;
|
||||
|
||||
/* Check if there is a device with active zones left */
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||
|
||||
if (!device->bdev)
|
||||
continue;
|
||||
|
||||
if (!zinfo->max_active_zones ||
|
||||
atomic_read(&zinfo->active_zones_left)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return;
|
||||
|
||||
block_group = btrfs_lookup_block_group(fs_info, logical);
|
||||
ASSERT(block_group);
|
||||
|
||||
if (logical + length < block_group->start + block_group->zone_capacity)
|
||||
goto out;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
|
||||
if (!block_group->zone_is_active) {
|
||||
spin_unlock(&block_group->lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
block_group->zone_is_active = 0;
|
||||
/* We should have consumed all the free space */
|
||||
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
|
||||
ASSERT(block_group->free_space_ctl->free_space == 0);
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
map = block_group->physical_map;
|
||||
device = map->stripes[0].dev;
|
||||
physical = map->stripes[0].physical;
|
||||
|
||||
if (!device->zone_info->max_active_zones)
|
||||
goto out;
|
||||
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||
list_del_init(&block_group->active_bg_list);
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
out:
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bg->fs_info;
|
||||
|
||||
spin_lock(&fs_info->relocation_bg_lock);
|
||||
if (fs_info->data_reloc_bg == bg->start)
|
||||
fs_info->data_reloc_bg = 0;
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
}
|
||||
|
||||
@@ -23,8 +23,11 @@ struct btrfs_zoned_device_info {
|
||||
u64 zone_size;
|
||||
u8 zone_size_shift;
|
||||
u32 nr_zones;
|
||||
unsigned int max_active_zones;
|
||||
atomic_t active_zones_left;
|
||||
unsigned long *seq_zones;
|
||||
unsigned long *empty_zones;
|
||||
unsigned long *active_zones;
|
||||
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
|
||||
};
|
||||
|
||||
@@ -40,7 +43,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
|
||||
u64 *bytenr_ret);
|
||||
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
||||
u64 *bytenr_ret);
|
||||
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
|
||||
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
|
||||
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror);
|
||||
u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
||||
u64 hole_end, u64 num_bytes);
|
||||
@@ -66,6 +69,13 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
|
||||
u64 physical_start, u64 physical_pos);
|
||||
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length);
|
||||
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
|
||||
int btrfs_zone_finish(struct btrfs_block_group *block_group);
|
||||
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
|
||||
int raid_index);
|
||||
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
|
||||
u64 length);
|
||||
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
@@ -113,8 +123,10 @@ static inline int btrfs_sb_log_location(struct btrfs_device *device, int mirror,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||
{ }
|
||||
static inline int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||
{
|
||||
@@ -199,6 +211,27 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
|
||||
int raid_index)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length) { }
|
||||
|
||||
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||
|
||||
@@ -1782,12 +1782,13 @@ EXPORT_SYMBOL(generic_update_time);
|
||||
* This does the actual work of updating an inodes time or version. Must have
|
||||
* had called mnt_want_write() before calling this.
|
||||
*/
|
||||
static int update_time(struct inode *inode, struct timespec64 *time, int flags)
|
||||
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
|
||||
{
|
||||
if (inode->i_op->update_time)
|
||||
return inode->i_op->update_time(inode, time, flags);
|
||||
return generic_update_time(inode, time, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(inode_update_time);
|
||||
|
||||
/**
|
||||
* atime_needs_update - update the access time
|
||||
@@ -1857,7 +1858,7 @@ void touch_atime(const struct path *path)
|
||||
* of the fs read only, e.g. subvolumes in Btrfs.
|
||||
*/
|
||||
now = current_time(inode);
|
||||
update_time(inode, &now, S_ATIME);
|
||||
inode_update_time(inode, &now, S_ATIME);
|
||||
__mnt_drop_write(mnt);
|
||||
skip_update:
|
||||
sb_end_write(inode->i_sb);
|
||||
@@ -2002,7 +2003,7 @@ int file_update_time(struct file *file)
|
||||
if (__mnt_want_write_file(file))
|
||||
return 0;
|
||||
|
||||
ret = update_time(inode, &now, sync_it);
|
||||
ret = inode_update_time(inode, &now, sync_it);
|
||||
__mnt_drop_write_file(file);
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -2496,6 +2496,8 @@ enum file_time_flags {
|
||||
|
||||
extern bool atime_needs_update(const struct path *, struct inode *);
|
||||
extern void touch_atime(const struct path *);
|
||||
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
|
||||
|
||||
static inline void file_accessed(struct file *file)
|
||||
{
|
||||
if (!(file->f_flags & O_NOATIME))
|
||||
|
||||
@@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args {
|
||||
*/
|
||||
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
|
||||
|
||||
/*
|
||||
* Read the protocol version in the structure
|
||||
*/
|
||||
#define BTRFS_SEND_FLAG_VERSION 0x8
|
||||
|
||||
#define BTRFS_SEND_FLAG_MASK \
|
||||
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
|
||||
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
|
||||
BTRFS_SEND_FLAG_OMIT_END_CMD)
|
||||
BTRFS_SEND_FLAG_OMIT_END_CMD | \
|
||||
BTRFS_SEND_FLAG_VERSION)
|
||||
|
||||
struct btrfs_ioctl_send_args {
|
||||
__s64 send_fd; /* in */
|
||||
@@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args {
|
||||
__u64 __user *clone_sources; /* in */
|
||||
__u64 parent_root; /* in */
|
||||
__u64 flags; /* in */
|
||||
__u64 reserved[4]; /* in */
|
||||
__u32 version; /* in */
|
||||
__u8 reserved[28]; /* in */
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user