From d27b2dcdb8d28a9435c04c9767ba18f89642a148 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 2 Nov 2021 07:24:20 -0700 Subject: [PATCH 01/21] string: uninline memcpy_and_pad commit 5c4e0a21fae877a7ef89be6dcc6263ec672372b8 upstream. When building m68k:allmodconfig, recent versions of gcc generate the following error if the length of UTS_RELEASE is less than 8 bytes. In function 'memcpy_and_pad', inlined from 'nvmet_execute_disc_identify' at drivers/nvme/target/discovery.c:268:2: arch/m68k/include/asm/string.h:72:25: error: '__builtin_memcpy' reading 8 bytes from a region of size 7 Discussions around the problem suggest that this only happens if an architecture does not provide strlen(), if -ffreestanding is provided as compiler option, and if CONFIG_FORTIFY_SOURCE=n. All of this is the case for m68k. The exact reasons are unknown, but seem to be related to the ability of the compiler to evaluate the return value of strlen() and the resulting execution flow in memcpy_and_pad(). It would be possible to work around the problem by using sizeof(UTS_RELEASE) instead of strlen(UTS_RELEASE), but that would only postpone the problem until the function is called in a similar way. Uninline memcpy_and_pad() instead to solve the problem for good. Suggested-by: Linus Torvalds Reviewed-by: Geert Uytterhoeven Acked-by: Andy Shevchenko Signed-off-by: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/string.h | 19 ++----------------- lib/string_helpers.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 5e96d656be7a..d68097b4f600 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -262,23 +262,8 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o #include #endif -/** - * memcpy_and_pad - Copy one buffer to another with padding - * @dest: Where to copy to - * @dest_len: The destination buffer size - * @src: Where to copy from - * @count: The number of bytes to copy - * @pad: Character to use for padding if space is left in destination. - */ -static inline void memcpy_and_pad(void *dest, size_t dest_len, - const void *src, size_t count, int pad) -{ - if (dest_len > count) { - memcpy(dest, src, count); - memset(dest + count, pad, dest_len - count); - } else - memcpy(dest, src, dest_len); -} +void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, + int pad); /** * str_has_prefix - Test if a string has a given prefix diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 3806a52ce697..2ddc10bd9add 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -696,3 +696,23 @@ void kfree_strarray(char **array, size_t n) kfree(array); } EXPORT_SYMBOL_GPL(kfree_strarray); + +/** + * memcpy_and_pad - Copy one buffer to another with padding + * @dest: Where to copy to + * @dest_len: The destination buffer size + * @src: Where to copy from + * @count: The number of bytes to copy + * @pad: Character to use for padding if space is left in destination. + */ +void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, + int pad) +{ + if (dest_len > count) { + memcpy(dest, src, count); + memset(dest + count, pad, dest_len - count); + } else { + memcpy(dest, src, dest_len); + } +} +EXPORT_SYMBOL(memcpy_and_pad); From 3256c84aaddc803382f9818945e85f2e796b9128 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Nov 2021 12:30:10 +0100 Subject: [PATCH 02/21] Revert "drm: fb_helper: improve CONFIG_FB dependency" This reverts commit 94e18f5a5dd1b5e3b89c665fc5ff780858b1c9f6 which is commit 9d6366e743f37d36ef69347924ead7bcc596076e upstream. It causes some build problems as reported by Jiri. Link: https://lore.kernel.org/r/9fdb2bf1-de52-1b9d-4783-c61ce39e8f51@kernel.org Reported-by: Jiri Slaby Cc: Jani Nikula Cc: Javier Martinez Canillas Cc: Arnd Bergmann Cc: Kees Cook Cc: Daniel Vetter Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 6ae4269118af..9199f53861ca 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -102,8 +102,9 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS config DRM_FBDEV_EMULATION bool "Enable legacy fbdev support for your modesetting driver" - depends on DRM_KMS_HELPER - depends on FB=y || FB=DRM_KMS_HELPER + depends on DRM + depends on FB=y || FB=DRM + select DRM_KMS_HELPER select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT From b06962406eca3d0ca818e2cda96e1b2e1f82bc94 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 19 Nov 2021 12:30:13 +0100 Subject: [PATCH 03/21] Revert "drm: fb_helper: fix CONFIG_FB dependency" This reverts commit c95380ba527ae0aee29b2a133c5d0c481d472759 which is commit 606b102876e3741851dfb09d53f3ee57f650a52c upstream. It causes some build problems as reported by Jiri. Link: https://lore.kernel.org/r/9fdb2bf1-de52-1b9d-4783-c61ce39e8f51@kernel.org Reported-by: Jiri Slaby Cc: Arnd Bergmann Cc: Kees Cook Cc: Daniel Vetter Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 9199f53861ca..cea777ae7fb9 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -103,7 +103,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS config DRM_FBDEV_EMULATION bool "Enable legacy fbdev support for your modesetting driver" depends on DRM - depends on FB=y || FB=DRM + depends on FB select DRM_KMS_HELPER select FB_CFB_FILLRECT select FB_CFB_COPYAREA From 19e32bd1cc37c34683a214242bee7a0b114831bd Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 14 Nov 2021 08:59:02 +0000 Subject: [PATCH 04/21] KVM: Fix steal time asm constraints commit 964b7aa0b040bdc6ec1c543ee620cda3f8b4c68a upstream. In 64-bit mode, x86 instruction encoding allows us to use the low 8 bits of any GPR as an 8-bit operand. In 32-bit mode, however, we can only use the [abcd] registers. For which, GCC has the "q" constraint instead of the less restrictive "r". Also fix st->preempted, which is an input/output operand rather than an input. Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status") Reported-by: kernel test robot Signed-off-by: David Woodhouse Message-Id: <89bf72db1b859990355f9c40713a34e0d2d86c98.camel@infradead.org> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3de93d4bf16b..c48e2b5729c5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3242,9 +3242,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) "xor %1, %1\n" "2:\n" _ASM_EXTABLE_UA(1b, 2b) - : "+r" (st_preempted), - "+&r" (err) - : "m" (st->preempted)); + : "+q" (st_preempted), + "+&r" (err), + "+m" (st->preempted)); if (err) goto out; From 02c5e9e992a221a2435b7f732aba3ac11784e7ac Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:13 +0900 Subject: [PATCH 05/21] btrfs: introduce btrfs_is_data_reloc_root commit 37f00a6d2e9c97d6e7b5c3d47c49b714c3d0b99f upstream There are several places in our codebase where we check if a root is the root of the data reloc tree and subsequent patches will introduce more. Factor out the check into a small helper function instead of open coding it multiple times. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/inode.c | 19 ++++++++----------- fs/btrfs/relocation.c | 3 +-- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c0cebcf745ce..5b990881052c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3842,6 +3842,11 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) return fs_info->zoned != 0; } +static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) +{ + return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; +} + /* * We use page status Private2 to indicate there is an ordered extent with * unfinished IO. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6965ed081346..f63d8a7e6dae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) goto fail; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + !btrfs_is_data_reloc_root(root)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ab456cb4bf8..45e020c9fdc9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2376,7 +2376,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, out: btrfs_free_path(path); - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) WARN_ON(ret > 0); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c096ab9bb5e..85663bccde8a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1151,7 +1151,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, * fails during the stage where it updates the bytenr of file extent * items. */ - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) min_alloc_size = num_bytes; else min_alloc_size = fs_info->sectorsize; @@ -1187,8 +1187,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (ret) goto out_drop_extent_cache; - if (root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID) { + if (btrfs_is_data_reloc_root(root)) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); /* @@ -1504,8 +1503,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(inode); - const bool is_reloc_ino = (inode->root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID); + const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &inode->io_tree; u64 range_start = start; @@ -1867,8 +1865,7 @@ out_check: btrfs_dec_nocow_writers(fs_info, disk_bytenr); nocow = false; - if (root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) /* * Error handled later, as we must prevent * extent_clear_unlock_delalloc() in error handler @@ -2207,7 +2204,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, if (btrfs_is_testing(fs_info)) return; - if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && + if (!btrfs_is_data_reloc_root(root) && do_list && !(state->state & EXTENT_NORESERVE) && (*bits & EXTENT_CLEAR_DATA_RESV)) btrfs_free_reserved_data_space_noquota(fs_info, len); @@ -2532,7 +2529,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, goto mapit; } else if (async && !skip_sum) { /* csum items have already been cloned */ - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) goto mapit; /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags, @@ -3304,7 +3301,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, u64 file_offset = pg_off + page_offset(page); int ret; - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && + if (btrfs_is_data_reloc_root(root) && test_range_bit(io_tree, file_offset, file_offset + sectorsize - 1, EXTENT_NODATASUM, 1, NULL)) { @@ -4005,7 +4002,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * without delay */ if (!btrfs_is_free_space_inode(inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID + && !btrfs_is_data_reloc_root(root) && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { btrfs_update_root_times(trans, root); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 914d403b4415..2e4f109723af 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4386,8 +4386,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, if (!rc) return 0; - BUG_ON(rc->stage == UPDATE_DATA_PTRS && - root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); + BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root)); level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= From d282dd7f4109a6a5fa9f8bb52cd4c23ed510f44e Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:14 +0900 Subject: [PATCH 06/21] btrfs: zoned: add a dedicated data relocation block group commit c2707a25562343511bf9a3a6a636a16a822204eb upstream Relocation in a zoned filesystem can fail with a transaction abort with error -22 (EINVAL). This happens because the relocation code assumes that the extents we relocated the data to have the same size the source extents had and ensures this by preallocating the extents. But in a zoned filesystem we currently can't preallocate the extents as this would break the sequential write required rule. Therefore it can happen that the writeback process kicks in while we're still adding pages to a delalloc range and starts writing out dirty pages. This then creates destination extents that are smaller than the source extents, triggering the following safety check in get_new_location(): 1034 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1035 ret = -EINVAL; 1036 goto out; 1037 } Temporarily create a dedicated block group for the relocation process, so no non-relocation data writes can interfere with the relocation writes. This is needed that we can switch the relocation process on a zoned filesystem from the REQ_OP_ZONE_APPEND writing we use for data to a scheme like in a non-zoned filesystem using REQ_OP_WRITE and preallocation. Fixes: 32430c614844 ("btrfs: zoned: enable relocation on a zoned filesystem") Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 1 + fs/btrfs/ctree.h | 7 ++++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 54 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/zoned.c | 10 ++++++++ fs/btrfs/zoned.h | 3 +++ 6 files changed, 74 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a3b830b8410a..a53ebc52bd51 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -902,6 +902,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cluster->refill_lock); btrfs_clear_treelog_bg(block_group); + btrfs_clear_data_reloc_bg(block_group); path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5b990881052c..ae06ad559353 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1017,6 +1017,13 @@ struct btrfs_fs_info { spinlock_t treelog_bg_lock; u64 treelog_bg; + /* + * Start of the dedicated data relocation block group, protected by + * relocation_bg_lock. + */ + spinlock_t relocation_bg_lock; + u64 data_reloc_bg; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f63d8a7e6dae..e00c4c1f622f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2883,6 +2883,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); + spin_lock_init(&fs_info->relocation_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45e020c9fdc9..87c23c5c0f26 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3495,6 +3495,9 @@ struct find_free_extent_ctl { /* Allocation is called for tree-log */ bool for_treelog; + /* Allocation is called for data relocation */ + bool for_data_reloc; + /* RAID index, converted from flags */ int index; @@ -3756,6 +3759,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, u64 avail; u64 bytenr = block_group->start; u64 log_bytenr; + u64 data_reloc_bytenr; int ret = 0; bool skip; @@ -3773,13 +3777,31 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (skip) return 1; + /* + * Do not allow non-relocation blocks in the dedicated relocation block + * group, and vice versa. + */ + spin_lock(&fs_info->relocation_bg_lock); + data_reloc_bytenr = fs_info->data_reloc_bg; + if (data_reloc_bytenr && + ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) || + (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr))) + skip = true; + spin_unlock(&fs_info->relocation_bg_lock); + if (skip) + return 1; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); spin_lock(&fs_info->treelog_bg_lock); + spin_lock(&fs_info->relocation_bg_lock); ASSERT(!ffe_ctl->for_treelog || block_group->start == fs_info->treelog_bg || fs_info->treelog_bg == 0); + ASSERT(!ffe_ctl->for_data_reloc || + block_group->start == fs_info->data_reloc_bg || + fs_info->data_reloc_bg == 0); if (block_group->ro) { ret = 1; @@ -3796,6 +3818,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, goto out; } + /* + * Do not allow currently used block group to be the data relocation + * dedicated block group. + */ + if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg && + (block_group->used || block_group->reserved)) { + ret = 1; + goto out; + } + avail = block_group->length - block_group->alloc_offset; if (avail < num_bytes) { if (ffe_ctl->max_extent_size < avail) { @@ -3813,6 +3845,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (ffe_ctl->for_treelog && !fs_info->treelog_bg) fs_info->treelog_bg = block_group->start; + if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg) + fs_info->data_reloc_bg = block_group->start; + ffe_ctl->found_offset = start + block_group->alloc_offset; block_group->alloc_offset += num_bytes; spin_lock(&ctl->tree_lock); @@ -3829,6 +3864,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; + if (ret && ffe_ctl->for_data_reloc) + fs_info->data_reloc_bg = 0; + spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); @@ -4085,6 +4123,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, ffe_ctl->hint_byte = fs_info->treelog_bg; spin_unlock(&fs_info->treelog_bg_lock); } + if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } return 0; default: BUG(); @@ -4129,6 +4173,8 @@ static noinline int find_free_extent(struct btrfs_root *root, struct btrfs_space_info *space_info; bool full_search = false; bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); + bool for_data_reloc = (btrfs_is_data_reloc_root(root) && + flags & BTRFS_BLOCK_GROUP_DATA); WARN_ON(num_bytes < fs_info->sectorsize); @@ -4143,6 +4189,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ffe_ctl.found_offset = 0; ffe_ctl.hint_byte = hint_byte_orig; ffe_ctl.for_treelog = for_treelog; + ffe_ctl.for_data_reloc = for_data_reloc; ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; /* For clustered allocation */ @@ -4220,6 +4267,8 @@ search: if (unlikely(block_group->ro)) { if (for_treelog) btrfs_clear_treelog_bg(block_group); + if (ffe_ctl.for_data_reloc) + btrfs_clear_data_reloc_bg(block_group); continue; } @@ -4408,6 +4457,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 flags; int ret; bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); + bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data); flags = get_alloc_profile_by_root(root, is_data); again: @@ -4431,8 +4481,8 @@ again: sinfo = btrfs_find_space_info(fs_info, flags); btrfs_err(fs_info, - "allocation failed flags %llu, wanted %llu tree-log %d", - flags, num_bytes, for_treelog); + "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d", + flags, num_bytes, for_treelog, for_data_reloc); if (sinfo) btrfs_dump_space_info(fs_info, sinfo, num_bytes, 1); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 47af1ab3bf12..18ee85e1c9a2 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1530,3 +1530,13 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, return device; } + +void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg == bg->start) + fs_info->data_reloc_bg = 0; + spin_unlock(&fs_info->relocation_bg_lock); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 4b299705bb12..70b3be517599 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -66,6 +66,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, u64 physical_start, u64 physical_pos); struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, u64 logical, u64 length); +void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -199,6 +200,8 @@ static inline struct btrfs_device *btrfs_zoned_get_device( return ERR_PTR(-EOPNOTSUPP); } +static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From f716e9827838f1554a770deec16c2968b9445b50 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:15 +0900 Subject: [PATCH 07/21] btrfs: zoned: only allow one process to add pages to a relocation inode commit 35156d852762b58855f513b4f8bb7f32d69dc9c5 upstream Don't allow more than one process to add pages to a relocation inode on a zoned filesystem, otherwise we cannot guarantee the sequential write rule once we're filling preallocated extents on a zoned filesystem. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaddd7225348..a40fb9c74dda 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5120,6 +5120,9 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { + struct inode *inode = mapping->host; + const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root); + const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info); int ret = 0; struct extent_page_data epd = { .bio_ctrl = { 0 }, @@ -5127,7 +5130,15 @@ int extent_writepages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; + /* + * Allow only a single thread to do the reloc work in zoned mode to + * protect the write pointer updates. + */ + if (data_reloc && zoned) + btrfs_inode_lock(inode, 0); ret = extent_write_cache_pages(mapping, wbc, &epd); + if (data_reloc && zoned) + btrfs_inode_unlock(inode, 0); ASSERT(ret <= 0); if (ret < 0) { end_write_bio(&epd, ret); From 080f457f35c0218b497ea709301545b69b113184 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:16 +0900 Subject: [PATCH 08/21] btrfs: zoned: use regular writes for relocation commit e6d261e3b1f777b499ce8f535ed44dd1b69278b7 upstream Now that we have a dedicated block group for relocation, we can use REQ_OP_WRITE instead of REQ_OP_ZONE_APPEND for writing out the data on relocation. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/zoned.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 18ee85e1c9a2..5672c24a2d58 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1304,6 +1304,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) if (!is_data_inode(&inode->vfs_inode)) return false; + /* + * Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the + * extent layout the relocation code has. + * Furthermore we have set aside own block-group from which only the + * relocation "process" can allocate and make sure only one process at a + * time can add pages to an extent that gets relocated, so it's safe to + * use regular REQ_OP_WRITE for this special case. + */ + if (btrfs_is_data_reloc_root(inode->root)) + return false; + cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) From d0fdafa8fcf30e0cfbe44349563b847a1b759ba0 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:17 +0900 Subject: [PATCH 09/21] btrfs: check for relocation inodes on zoned btrfs in should_nocow commit 2adada886b26e998b5a624e72f0834ebfdc54cc7 upstream Prepare for allowing preallocation for relocation inodes. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 85663bccde8a..61b4651f008d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1945,7 +1945,15 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page const bool zoned = btrfs_is_zoned(inode->root->fs_info); if (should_nocow(inode, start, end)) { - ASSERT(!zoned); + /* + * Normally on a zoned device we're only doing COW writes, but + * in case of relocation on a zoned filesystem we have taken + * precaution, that we're only writing sequentially. It's safe + * to use run_delalloc_nocow() here, like for regular + * preallocated inodes. + */ + ASSERT(!zoned || + (zoned && btrfs_is_data_reloc_root(inode->root))); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); } else if (!inode_can_compress(inode) || From 0fc2241ac237dcfa037f97f734145d45fa6ebf3d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 18 Nov 2021 17:58:18 +0900 Subject: [PATCH 10/21] btrfs: zoned: allow preallocation for relocation inodes commit 960a3166aed015887cd54423a6589ae4d0b65bd5 upstream Now that we use a dedicated block group and regular writes for data relocation, we can preallocate the space needed for a relocated inode, just like we do in regular mode. Essentially this reverts commit 32430c614844 ("btrfs: zoned: enable relocation on a zoned filesystem") as it is not needed anymore. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2e4f109723af..d81bee621d37 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2852,31 +2852,6 @@ static noinline_for_stack int prealloc_file_extent_cluster( if (ret) return ret; - /* - * On a zoned filesystem, we cannot preallocate the file region. - * Instead, we dirty and fiemap_write the region. - */ - if (btrfs_is_zoned(inode->root->fs_info)) { - struct btrfs_root *root = inode->root; - struct btrfs_trans_handle *trans; - - end = cluster->end - offset + 1; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); - i_size_write(&inode->vfs_inode, end); - ret = btrfs_update_inode(trans, root, inode); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - return ret; - } - - return btrfs_end_transaction(trans); - } - btrfs_inode_lock(&inode->vfs_inode, 0); for (nr = 0; nr < cluster->nr; nr++) { start = cluster->boundary[nr] - offset; @@ -3084,7 +3059,6 @@ release_page: static int relocate_file_extent_cluster(struct inode *inode, struct file_extent_cluster *cluster) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 offset = BTRFS_I(inode)->index_cnt; unsigned long index; unsigned long last_index; @@ -3114,8 +3088,6 @@ static int relocate_file_extent_cluster(struct inode *inode, for (index = (cluster->start - offset) >> PAGE_SHIFT; index <= last_index && !ret; index++) ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index); - if (btrfs_is_zoned(fs_info) && !ret) - ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); if (ret == 0) WARN_ON(cluster_nr != cluster->nr); out: @@ -3770,12 +3742,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_inode_item *item; struct extent_buffer *leaf; - u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; int ret; - if (btrfs_is_zoned(trans->fs_info)) - flags &= ~BTRFS_INODE_PREALLOC; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -3790,7 +3758,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, flags); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | + BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); From 8d0956438eecfafbefb7cd15b7773087549b38d4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 May 2021 21:51:10 -0700 Subject: [PATCH 11/21] fortify: Explicitly disable Clang support commit a52f8a59aef46b59753e583bf4b28fccb069ce64 upstream. Clang has never correctly compiled the FORTIFY_SOURCE defenses due to a couple bugs: Eliding inlines with matching __builtin_* names https://bugs.llvm.org/show_bug.cgi?id=50322 Incorrect __builtin_constant_p() of some globals https://bugs.llvm.org/show_bug.cgi?id=41459 In the process of making improvements to the FORTIFY_SOURCE defenses, the first (silent) bug (coincidentally) becomes worked around, but exposes the latter which breaks the build. As such, Clang must not be used with CONFIG_FORTIFY_SOURCE until at least latter bug is fixed (in Clang 13), and the fortify routines have been rearranged. Update the Kconfig to reflect the reality of the current situation. Signed-off-by: Kees Cook Acked-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAKwvOd=A+ueGV2ihdy5GtgR2fQbcXjjAtVxv3=cPjffpebZB7A@mail.gmail.com Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- security/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/Kconfig b/security/Kconfig index 0ced7fd33e4d..fe6c0395fa02 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -191,6 +191,9 @@ config HARDENED_USERCOPY_PAGESPAN config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE + # https://bugs.llvm.org/show_bug.cgi?id=50322 + # https://bugs.llvm.org/show_bug.cgi?id=41459 + depends on !CC_IS_CLANG help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. From 1f124a6611910f8c875f7ff3ac84727c41a4ab72 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Tue, 26 Oct 2021 22:40:12 +0800 Subject: [PATCH 12/21] block: Add a helper to validate the block size commit 570b1cac477643cbf01a45fa5d018430a1fddbce upstream. There are some duplicated codes to validate the block size in block drivers. This limitation actually comes from block layer, so this patch tries to add a new block layer helper for that. Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20211026144015.188-2-xieyongji@bytedance.com Signed-off-by: Jens Axboe Cc: Tadeusz Struk Signed-off-by: Greg Kroah-Hartman --- include/linux/blkdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 683aee365420..0a9fdcbbab83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -235,6 +235,14 @@ struct request { void *end_io_data; }; +static inline int blk_validate_block_size(unsigned int bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; + + return 0; +} + static inline bool blk_op_is_passthrough(unsigned int op) { op &= REQ_OP_MASK; From c0991182aca3d8d115de1d75ce912d8221b8a551 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Tue, 26 Oct 2021 22:40:14 +0800 Subject: [PATCH 13/21] loop: Use blk_validate_block_size() to validate block size commit af3c570fb0df422b4906ebd11c1bf363d89961d5 upstream. Remove loop_validate_block_size() and use the block layer helper to validate block size. Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20211026144015.188-4-xieyongji@bytedance.com Signed-off-by: Jens Axboe Cc: Tadeusz Struk Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7bf4686af774..dfc72a1f6500 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -272,19 +272,6 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } -/** - * loop_validate_block_size() - validates the passed in block size - * @bsize: size to validate - */ -static int -loop_validate_block_size(unsigned short bsize) -{ - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) - return -EINVAL; - - return 0; -} - /** * loop_set_size() - sets device size and notifies userspace * @lo: struct loop_device to set the size for @@ -1236,7 +1223,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, } if (config->block_size) { - error = loop_validate_block_size(config->block_size); + error = blk_validate_block_size(config->block_size); if (error) goto out_unlock; } @@ -1759,7 +1746,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (lo->lo_state != Lo_bound) return -ENXIO; - err = loop_validate_block_size(arg); + err = blk_validate_block_size(arg); if (err) return err; From 73f1e74f9c87cf36be5429c9a18358742806d7ed Mon Sep 17 00:00:00 2001 From: Nicholas Flintham Date: Thu, 30 Sep 2021 09:22:39 +0100 Subject: [PATCH 14/21] Bluetooth: btusb: Add support for TP-Link UB500 Adapter commit 4fd6d490796171bf786090fee782e252186632e4 upstream. Add support for TP-Link UB500 Adapter (RTL8761B) * /sys/kernel/debug/usb/devices T: Bus=01 Lev=02 Prnt=05 Port=01 Cnt=01 Dev#= 78 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2357 ProdID=0604 Rev= 2.00 S: Manufacturer= S: Product=TP-Link UB500 Adapter S: SerialNumber=E848B8C82000 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Nicholas Flintham Signed-off-by: Marcel Holtmann Cc: Szabolcs Sipos Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 60d2fce59a71..79d0db542da3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -433,6 +433,10 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0bda, 0xb009), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x2ff8, 0xb011), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8761B Bluetooth devices */ + { USB_DEVICE(0x2357, 0x0604), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, + /* Additional Realtek 8761BU Bluetooth devices */ { USB_DEVICE(0x0b05, 0x190e), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, From a912418410ab64eec86d31cc166b4df48b7bd9e9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 13 Nov 2021 20:41:17 +0100 Subject: [PATCH 15/21] parisc/entry: fix trace test in syscall exit path commit 3ec18fc7831e7d79e2d536dd1f3bc0d3ba425e8a upstream. commit 8779e05ba8aa ("parisc: Fix ptrace check on syscall return") fixed testing of TI_FLAGS. This uncovered a bug in the test mask. syscall_restore_rfi is only used when the kernel needs to exit to usespace with single or block stepping and the recovery counter enabled. The test however used _TIF_SYSCALL_TRACE_MASK, which includes a lot of bits that shouldn't be tested here. Fix this by using TIF_SINGLESTEP and TIF_BLOCKSTEP directly. I encountered this bug by enabling syscall tracepoints. Both in qemu and on real hardware. As soon as i enabled the tracepoint (sys_exit_read, but i guess it doesn't really matter which one), i got random page faults in userspace almost immediately. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 2716e58b498b..437c8d31f390 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1835,7 +1835,7 @@ syscall_restore: /* Are we being ptraced? */ LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 - ldi _TIF_SYSCALL_TRACE_MASK,%r2 + ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi From 06ce633b3bfd1077922c9ce76e0da0c3d1c527ed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 4 Nov 2021 18:01:29 +0000 Subject: [PATCH 16/21] PCI/MSI: Deal with devices lying about their MSI mask capability commit 2226667a145db2e1f314d7f57fd644fe69863ab9 upstream. It appears that some devices are lying about their mask capability, pretending that they don't have it, while they actually do. The net result is that now that we don't enable MSIs on such endpoint. Add a new per-device flag to deal with this. Further patches will make use of it, sadly. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20211104180130.3825416-2-maz@kernel.org Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 3 +++ include/linux/pci.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index ea6371eb9b25..e2dedfa9072d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -477,6 +477,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) goto out; pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); + /* Lies, damned lies, and MSIs */ + if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) + control |= PCI_MSI_FLAGS_MASKBIT; entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); diff --git a/include/linux/pci.h b/include/linux/pci.h index cd8aa6fce204..152a4d74f87f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -233,6 +233,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), /* Don't use Relaxed Ordering for TLPs directed at this device */ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), + /* Device does honor MSI masking despite saying otherwise */ + PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { From 7931b7e318827104b2bfd4c2210aa8a870258609 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 4 Nov 2021 18:01:30 +0000 Subject: [PATCH 17/21] PCI: Add MSI masking quirk for Nvidia ION AHCI commit f21082fb20dbfb3e42b769b59ef21c2a7f2c7c1f upstream. The ION AHCI device pretends that MSI masking isn't a thing, while it actually implements it and needs MSIs to be unmasked to work. Add a quirk to that effect. Reported-by: Rui Salvaterra Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Rui Salvaterra Reviewed-by: Thomas Gleixner Cc: Bjorn Helgaas Link: https://lore.kernel.org/r/CALjTZvbzYfBuLB+H=fj2J+9=DxjQ2Uqcy0if_PvmJ-nU-qEgkg@mail.gmail.com Link: https://lore.kernel.org/r/20211104180130.3825416-3-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6c957124f84d..208fa03acdda 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5796,3 +5796,9 @@ static void apex_pci_fixup_class(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a, PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class); + +static void nvidia_ion_ahci_fixup(struct pci_dev *pdev) +{ + pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup); From bd378dcd503194301eb7ee87a77342c49a0d0bcb Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 10 Nov 2021 18:18:14 -0800 Subject: [PATCH 18/21] perf/core: Avoid put_page() when GUP fails commit 4716023a8f6a0f4a28047f14dd7ebdc319606b84 upstream. PEBS PERF_SAMPLE_PHYS_ADDR events use perf_virt_to_phys() to convert PMU sampled virtual addresses to physical using get_user_page_fast_only() and page_to_phys(). Some get_user_page_fast_only() error cases return false, indicating no page reference, but still initialize the output page pointer with an unreferenced page. In these error cases perf_virt_to_phys() calls put_page(). This causes page reference count underflow, which can lead to unintentional page sharing. Fix perf_virt_to_phys() to only put_page() if get_user_page_fast_only() returns a referenced page. Fixes: fc7ce9c74c3ad ("perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR") Signed-off-by: Greg Thelen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211111021814.757086-1-gthelen@google.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f23ca260307f..7162b600e7ea 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7154,7 +7154,6 @@ void perf_output_sample(struct perf_output_handle *handle, static u64 perf_virt_to_phys(u64 virt) { u64 phys_addr = 0; - struct page *p = NULL; if (!virt) return 0; @@ -7173,14 +7172,15 @@ static u64 perf_virt_to_phys(u64 virt) * If failed, leave phys_addr as 0. */ if (current->mm != NULL) { + struct page *p; + pagefault_disable(); - if (get_user_page_fast_only(virt, 0, &p)) + if (get_user_page_fast_only(virt, 0, &p)) { phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + put_page(p); + } pagefault_enable(); } - - if (p) - put_page(p); } return phys_addr; From ef2590a5305e0b8e9342f84c2214aa478ee7f28e Mon Sep 17 00:00:00 2001 From: Subbaraman Narayanamurthy Date: Thu, 4 Nov 2021 16:57:07 -0700 Subject: [PATCH 19/21] thermal: Fix NULL pointer dereferences in of_thermal_ functions commit 96cfe05051fd8543cdedd6807ec59a0e6c409195 upstream. of_parse_thermal_zones() parses the thermal-zones node and registers a thermal_zone device for each subnode. However, if a thermal zone is consuming a thermal sensor and that thermal sensor device hasn't probed yet, an attempt to set trip_point_*_temp for that thermal zone device can cause a NULL pointer dereference. Fix it. console:/sys/class/thermal/thermal_zone87 # echo 120000 > trip_point_0_temp ... Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 ... Call trace: of_thermal_set_trip_temp+0x40/0xc4 trip_point_temp_store+0xc0/0x1dc dev_attr_store+0x38/0x88 sysfs_kf_write+0x64/0xc0 kernfs_fop_write_iter+0x108/0x1d0 vfs_write+0x2f4/0x368 ksys_write+0x7c/0xec __arm64_sys_write+0x20/0x30 el0_svc_common.llvm.7279915941325364641+0xbc/0x1bc do_el0_svc+0x28/0xa0 el0_svc+0x14/0x24 el0_sync_handler+0x88/0xec el0_sync+0x1c0/0x200 While at it, fix the possible NULL pointer dereference in other functions as well: of_thermal_get_temp(), of_thermal_set_emul_temp(), of_thermal_get_trend(). Suggested-by: David Collins Signed-off-by: Subbaraman Narayanamurthy Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_of.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 6379f26a335f..9233f7e74454 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -89,7 +89,7 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_temp) + if (!data->ops || !data->ops->get_temp) return -EINVAL; return data->ops->get_temp(data->sensor_data, temp); @@ -186,6 +186,9 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; + if (!data->ops || !data->ops->set_emul_temp) + return -EINVAL; + return data->ops->set_emul_temp(data->sensor_data, temp); } @@ -194,7 +197,7 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_trend) + if (!data->ops || !data->ops->get_trend) return -EINVAL; return data->ops->get_trend(data->sensor_data, trip, trend); @@ -301,7 +304,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, if (trip >= data->ntrips || trip < 0) return -EDOM; - if (data->ops->set_trip_temp) { + if (data->ops && data->ops->set_trip_temp) { int ret; ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); From 1af7386f5f71f8cb23ea34862b481ed6a33ef538 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 17 Nov 2021 17:05:41 +0100 Subject: [PATCH 20/21] Revert "ACPI: scan: Release PM resources blocked by unused objects" commit 3b2b49e6dfdcf423506a771bf44cee842596351a upstream. Revert commit c10383e8ddf4 ("ACPI: scan: Release PM resources blocked by unused objects"), because it causes boot issues to appear on some platforms. Reported-by: Kyle D. Pelton Reported-by: Saranya Gopal Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 25 ------------------------- drivers/acpi/internal.h | 1 - drivers/acpi/scan.c | 6 ------ 3 files changed, 32 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 1cfafa254e3d..7a33a6d985f8 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -340,28 +340,3 @@ void acpi_device_notify_remove(struct device *dev) acpi_unbind_one(dev); } - -int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used) -{ - struct acpi_device *adev = to_acpi_device(dev); - - /* - * Skip device objects with device IDs, because they may be in use even - * if they are not companions of any physical device objects. - */ - if (adev->pnp.type.hardware_id) - return 0; - - mutex_lock(&adev->physical_node_lock); - - /* - * Device objects without device IDs are not in use if they have no - * corresponding physical device objects. - */ - if (list_empty(&adev->physical_node_list)) - acpi_device_set_power(adev, ACPI_STATE_D3_COLD); - - mutex_unlock(&adev->physical_node_lock); - - return 0; -} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 8fbdc172864b..d91b560e8867 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -117,7 +117,6 @@ bool acpi_device_is_battery(struct acpi_device *adev); bool acpi_device_is_first_physical_node(struct acpi_device *adev, const struct device *dev); int acpi_bus_register_early_device(int type); -int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used); /* -------------------------------------------------------------------------- Device Matching and Notification diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 770b82483d74..5b54c80b9d32 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2559,12 +2559,6 @@ int __init acpi_scan_init(void) } } - /* - * Make sure that power management resources are not blocked by ACPI - * device objects with no users. - */ - bus_for_each_dev(&acpi_bus_type, NULL, NULL, acpi_dev_turn_off_if_unused); - acpi_turn_off_unused_power_resources(); acpi_scan_initialized = true; From 9ac77cf6e1bd35ef538077245feaee8f1146ca4b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 21 Nov 2021 13:44:15 +0100 Subject: [PATCH 21/21] Linux 5.15.4 Link: https://lore.kernel.org/r/20211119171444.640508836@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Fox Chen Tested-by: Shuah Khan Tested-by: Rudi Heitbaum Tested-by: Guenter Roeck Tested-By: Scott Bruce Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 79eeb13c0973..759e68a02cf0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Trick or Treat