From a75a5d163857cd507255b04f8d11613978434636 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 17 Mar 2021 17:23:31 +0100 Subject: [PATCH 1/9] ext4: check journal inode extents more carefully commit ce9f24cccdc019229b70a5c15e2b09ad9c0ab5d1 upstream. Currently, system zones just track ranges of block, that are "important" fs metadata (bitmaps, group descriptors, journal blocks, etc.). This however complicates how extent tree (or indirect blocks) can be checked for inodes that actually track such metadata - currently the journal inode but arguably we should be treating quota files or resize inode similarly. We cannot run __ext4_ext_check() on such metadata inodes when loading their extents as that would immediately trigger the validity checks and so we just hack around that and special-case the journal inode. This however leads to a situation that a journal inode which has extent tree of depth at least one can have invalid extent tree that gets unnoticed until ext4_cache_extents() crashes. To overcome this limitation, track inode number each system zone belongs to (0 is used for zones not belonging to any inode). We can then verify inode number matches the expected one when verifying extent tree and thus avoid the false errors. With this there's no need to to special-case journal inode during extent tree checking anymore so remove it. Fixes: 0a944e8a6c66 ("ext4: don't perform block validity checks on the journal inode") Reported-by: Wolfgang Frisch Reviewed-by: Lukas Czerner Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20200728130437.7804-4-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/block_validity.c | 43 ++++++++++++++++++++-------------------- fs/ext4/ext4.h | 6 +++--- fs/ext4/extents.c | 16 ++++++--------- fs/ext4/indirect.c | 6 ++---- fs/ext4/inode.c | 5 ++--- fs/ext4/mballoc.c | 4 ++-- 6 files changed, 37 insertions(+), 43 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index f22a89cdb407..1ea8fc9ff048 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -24,6 +24,7 @@ struct ext4_system_zone { struct rb_node node; ext4_fsblk_t start_blk; unsigned int count; + u32 ino; }; static struct kmem_cache *ext4_system_zone_cachep; @@ -45,7 +46,8 @@ void ext4_exit_system_zone(void) static inline int can_merge(struct ext4_system_zone *entry1, struct ext4_system_zone *entry2) { - if ((entry1->start_blk + entry1->count) == entry2->start_blk) + if ((entry1->start_blk + entry1->count) == entry2->start_blk && + entry1->ino == entry2->ino) return 1; return 0; } @@ -66,7 +68,7 @@ static void release_system_zone(struct ext4_system_blocks *system_blks) */ static int add_system_zone(struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, - unsigned int count) + unsigned int count, u32 ino) { struct ext4_system_zone *new_entry, *entry; struct rb_node **n = &system_blks->root.rb_node, *node; @@ -89,6 +91,7 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, return -ENOMEM; new_entry->start_blk = start_blk; new_entry->count = count; + new_entry->ino = ino; new_node = &new_entry->node; rb_link_node(new_node, parent, n); @@ -145,7 +148,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi) static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, struct ext4_system_blocks *system_blks, ext4_fsblk_t start_blk, - unsigned int count) + unsigned int count, ino_t ino) { struct ext4_system_zone *entry; struct rb_node *n; @@ -169,7 +172,7 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, n = n->rb_right; else { sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); - return 0; + return entry->ino == ino; } } return 1; @@ -204,17 +207,16 @@ static int ext4_protect_reserved_inode(struct super_block *sb, if (n == 0) { i++; } else { - if (!ext4_data_block_valid_rcu(sbi, system_blks, - map.m_pblk, n)) { - ext4_error(sb, "blocks %llu-%llu from inode %u " + err = add_system_zone(system_blks, map.m_pblk, n, ino); + if (err < 0) { + if (err == -EFSCORRUPTED) { + ext4_error(sb, + "blocks %llu-%llu from inode %u " "overlap system zone", map.m_pblk, map.m_pblk + map.m_len - 1, ino); - err = -EFSCORRUPTED; + } break; } - err = add_system_zone(system_blks, map.m_pblk, n); - if (err < 0) - break; i += n; } } @@ -259,19 +261,19 @@ int ext4_setup_system_zone(struct super_block *sb) ((i < 5) || ((i % flex_size) == 0))) add_system_zone(system_blks, ext4_group_first_block_no(sb, i), - ext4_bg_num_gdb(sb, i) + 1); + ext4_bg_num_gdb(sb, i) + 1, 0); gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(system_blks, - ext4_block_bitmap(sb, gdp), 1); + ext4_block_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, - ext4_inode_bitmap(sb, gdp), 1); + ext4_inode_bitmap(sb, gdp), 1, 0); if (ret) goto err; ret = add_system_zone(system_blks, ext4_inode_table(sb, gdp), - sbi->s_itb_per_group); + sbi->s_itb_per_group, 0); if (ret) goto err; } @@ -320,7 +322,7 @@ void ext4_release_system_zone(struct super_block *sb) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); } -int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, unsigned int count) { struct ext4_system_blocks *system_blks; @@ -332,9 +334,9 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, * mount option. */ rcu_read_lock(); - system_blks = rcu_dereference(sbi->system_blks); - ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk, - count); + system_blks = rcu_dereference(EXT4_SB(inode->i_sb)->system_blks); + ret = ext4_data_block_valid_rcu(EXT4_SB(inode->i_sb), system_blks, + start_blk, count, inode->i_ino); rcu_read_unlock(); return ret; } @@ -354,8 +356,7 @@ int ext4_check_blockref(const char *function, unsigned int line, while (bref < p+max) { blk = le32_to_cpu(*bref++); if (blk && - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), - blk, 1))) { + unlikely(!ext4_inode_block_valid(inode, blk, 1))) { es->s_last_error_block = cpu_to_le64(blk); ext4_error_inode(inode, function, line, blk, "invalid block"); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a436307eed00..6938dff9f04b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3180,9 +3180,9 @@ extern void ext4_release_system_zone(struct super_block *sb); extern int ext4_setup_system_zone(struct super_block *sb); extern int __init ext4_init_system_zone(void); extern void ext4_exit_system_zone(void); -extern int ext4_data_block_valid(struct ext4_sb_info *sbi, - ext4_fsblk_t start_blk, - unsigned int count); +extern int ext4_inode_block_valid(struct inode *inode, + ext4_fsblk_t start_blk, + unsigned int count); extern int ext4_check_blockref(const char *, unsigned int, struct inode *, __le32 *, unsigned int); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a4570e37cb0..36708d9d71cb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) */ if (lblock + len <= lblock) return 0; - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + return ext4_inode_block_valid(inode, block, len); } static int ext4_valid_extent_idx(struct inode *inode, @@ -385,7 +385,7 @@ static int ext4_valid_extent_idx(struct inode *inode, { ext4_fsblk_t block = ext4_idx_pblock(ext_idx); - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); + return ext4_inode_block_valid(inode, block, 1); } static int ext4_valid_extent_entries(struct inode *inode, @@ -542,14 +542,10 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - if (!ext4_has_feature_journal(inode->i_sb) || - (inode->i_ino != - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; - } + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index e1801b288847..a5442528a60d 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -842,8 +842,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, else if (ext4_should_journal_data(inode)) flags |= EXT4_FREE_BLOCKS_FORGET; - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { + if (!ext4_inode_block_valid(inode, block_to_free, count)) { EXT4_ERROR_INODE(inode, "attempt to clear invalid " "blocks %llu len %lu", (unsigned long long) block_to_free, count); @@ -1005,8 +1004,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!nr) continue; /* A hole */ - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { + if (!ext4_inode_block_valid(inode, nr, 1)) { EXT4_ERROR_INODE(inode, "invalid indirect mapped " "block %lu (level %d)", diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index edeb837081c8..296ffe92e250 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -421,8 +421,7 @@ static int __check_block_validity(struct inode *inode, const char *func, (inode->i_ino == le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) return 0; - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, - map->m_len)) { + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, @@ -5072,7 +5071,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = 0; if (ei->i_file_acl && - !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { + !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) { ext4_error_inode(inode, function, line, 0, "iget: bad extended attribute block %llu", ei->i_file_acl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ec6f65c91d93..db47a06cfb74 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2990,7 +2990,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - if (!ext4_data_block_valid(sbi, block, len)) { + if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata", block, block+len); /* File system mounted not to panic on error @@ -4755,7 +4755,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && - !ext4_data_block_valid(sbi, block, count)) { + !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks not in datazone - " "block = %llu, count = %lu", block, count); goto error_return; From 23ce6b7d40612b1166301f1abbfe9f4a69de69d0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 17 Mar 2021 11:50:31 +0000 Subject: [PATCH 2/9] KVM: arm64: nvhe: Save the SPE context early commit b96b0c5de685df82019e16826a282d53d86d112c upstream The nVHE KVM hyp drains and disables the SPE buffer, before entering the guest, as the EL1&0 translation regime is going to be loaded with that of the guest. But this operation is performed way too late, because : - The owning translation regime of the SPE buffer is transferred to EL2. (MDCR_EL2_E2PB == 0) - The guest Stage1 is loaded. Thus the flush could use the host EL1 virtual address, but use the EL2 translations instead of host EL1, for writing out any cached data. Fix this by moving the SPE buffer handling early enough. The restore path is doing the right thing. Cc: stable@vger.kernel.org # v4.19 Cc: Christoffer Dall Cc: Marc Zyngier Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Alexandru Elisei Signed-off-by: Suzuki K Poulose Acked-by: Marc Zyngier Signed-off-by: Sasha Levin --- arch/arm64/include/asm/kvm_hyp.h | 3 +++ arch/arm64/kvm/hyp/debug-sr.c | 24 +++++++++++++++--------- arch/arm64/kvm/hyp/switch.c | 4 +++- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 384c34397619..5f52d6d670e9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -144,6 +144,9 @@ void __sysreg32_restore_state(struct kvm_vcpu *vcpu); void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 50009766e5e5..3c5414633bb7 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -149,6 +149,21 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); } +void __hyp_text __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) +{ + /* + * Non-VHE: Disable and flush SPE data generation + * VHE: The vcpu can run, but it can't hide. + */ + __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); + +} + +void __hyp_text __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); +} + void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; @@ -156,13 +171,6 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -182,8 +190,6 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 15312e429b7d..1d16ce0b7e0d 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -560,6 +560,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) guest_ctxt = &vcpu->arch.ctxt; __sysreg_save_state_nvhe(host_ctxt); + __debug_save_host_buffers_nvhe(vcpu); __activate_traps(vcpu); __activate_vm(kern_hyp_va(vcpu->kvm)); @@ -599,11 +600,12 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); + __debug_switch_to_host(vcpu); /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_switch_to_host(vcpu); + __debug_restore_host_buffers_nvhe(vcpu); return exit_code; } From bc49612a0e2c379a0d997375901c5371ba015518 Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Tue, 16 Mar 2021 09:47:02 +0100 Subject: [PATCH 3/9] bpf: Prohibit alu ops for pointer types not defining ptr_limit commit f232326f6966cf2a1d1db7bc917a4ce5f9f55f76 upstream. The purpose of this patch is to streamline error propagation and in particular to propagate retrieve_ptr_limit() errors for pointer types that are not defining a ptr_limit such that register-based alu ops against these types can be rejected. The main rationale is that a gap has been identified by Piotr in the existing protection against speculatively out-of-bounds loads, for example, in case of ctx pointers, unprivileged programs can still perform pointer arithmetic. This can be abused to execute speculatively out-of-bounds loads without restrictions and thus extract contents of kernel memory. Fix this by rejecting unprivileged programs that attempt any pointer arithmetic on unprotected pointer types. The two affected ones are pointer to ctx as well as pointer to map. Field access to a modified ctx' pointer is rejected at a later point in time in the verifier, and 7c6967326267 ("bpf: Permit map_ptr arithmetic with opcode add and offset 0") only relevant for root-only use cases. Risk of unprivileged program breakage is considered very low. Fixes: 7c6967326267 ("bpf: Permit map_ptr arithmetic with opcode add and offset 0") Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Signed-off-by: Piotr Krysiuk Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index daf0a9637d73..67775334068b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2804,6 +2804,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, u32 alu_state, alu_limit; struct bpf_reg_state tmp; bool ret; + int err; if (can_skip_alu_sanitation(env, insn)) return 0; @@ -2819,10 +2820,13 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) - return 0; - if (update_alu_sanitation_state(aux, alu_state, alu_limit)) - return -EACCES; + err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); + if (err < 0) + return err; + + err = update_alu_sanitation_state(aux, alu_state, alu_limit); + if (err < 0) + return err; do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@ -2920,7 +2924,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to add from different maps or paths\n", dst); + verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; } /* We can take a fixed offset as long as it doesn't overflow @@ -2975,7 +2979,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_SUB: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to sub from different maps or paths\n", dst); + verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret; } if (dst_reg == off_reg) { From ec5307f2ed2377fc55f0a8c990c6004c63014a54 Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Tue, 16 Mar 2021 08:20:16 +0100 Subject: [PATCH 4/9] bpf: Fix off-by-one for area size in creating mask to left commit 10d2bb2e6b1d8c4576c56a748f697dbeb8388899 upstream. retrieve_ptr_limit() computes the ptr_limit for registers with stack and map_value type. ptr_limit is the size of the memory area that is still valid / in-bounds from the point of the current position and direction of the operation (add / sub). This size will later be used for masking the operation such that attempting out-of-bounds access in the speculative domain is redirected to remain within the bounds of the current map value. When masking to the right the size is correct, however, when masking to the left, the size is off-by-one which would lead to an incorrect mask and thus incorrect arithmetic operation in the non-speculative domain. Piotr found that if the resulting alu_limit value is zero, then the BPF_MOV32_IMM() from the fixup_bpf_calls() rewrite will end up loading 0xffffffff into AX instead of sign-extending to the full 64 bit range, and as a result, this allows abuse for executing speculatively out-of- bounds loads against 4GB window of address space and thus extracting the contents of kernel memory via side-channel. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Signed-off-by: Piotr Krysiuk Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 67775334068b..b76391055839 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2740,13 +2740,13 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, case PTR_TO_STACK: off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) - *ptr_limit = MAX_BPF_STACK + off; + *ptr_limit = MAX_BPF_STACK + off + 1; else *ptr_limit = -off; return 0; case PTR_TO_MAP_VALUE: if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + *ptr_limit = ptr_reg->umax_value + ptr_reg->off + 1; } else { off = ptr_reg->smin_value + ptr_reg->off; *ptr_limit = ptr_reg->map_ptr->value_size - off; From 087299234e6fa5d2e611df782602dbd9864957b0 Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Tue, 16 Mar 2021 08:26:25 +0100 Subject: [PATCH 5/9] bpf: Simplify alu_limit masking for pointer arithmetic commit b5871dca250cd391885218b99cc015aca1a51aea upstream. Instead of having the mov32 with aux->alu_limit - 1 immediate, move this operation to retrieve_ptr_limit() instead to simplify the logic and to allow for subsequent sanity boundary checks inside retrieve_ptr_limit(). This avoids in future that at the time of the verifier masking rewrite we'd run into an underflow which would not sign extend due to the nature of mov32 instruction. Signed-off-by: Piotr Krysiuk Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b76391055839..a9f35c9b7760 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2740,16 +2740,16 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, case PTR_TO_STACK: off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) - *ptr_limit = MAX_BPF_STACK + off + 1; + *ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off; + *ptr_limit = -off - 1; return 0; case PTR_TO_MAP_VALUE: if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off + 1; + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off; + *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } return 0; default: @@ -6089,7 +6089,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) off_reg = issrc ? insn->src_reg : insn->dst_reg; if (isneg) *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); - *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); From 86eddd693daacc0c6746cd789569bd5481d2ea36 Mon Sep 17 00:00:00 2001 From: Piotr Krysiuk Date: Tue, 16 Mar 2021 09:47:02 +0100 Subject: [PATCH 6/9] bpf: Add sanity check for upper ptr_limit commit 1b1597e64e1a610c7a96710fc4717158e98a08b3 upstream. Given we know the max possible value of ptr_limit at the time of retrieving the latter, add basic assertions, so that the verifier can bail out if anything looks odd and reject the program. Nothing triggered this so far, but it also does not hurt to have these. Signed-off-by: Piotr Krysiuk Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a9f35c9b7760..a0cdb6c4a330 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2734,24 +2734,29 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, { bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off; + u32 off, max; switch (ptr_reg->type) { case PTR_TO_STACK: + /* Offset 0 is out-of-bounds, but acceptable start for the + * left direction, see BPF_REG_FP. + */ + max = MAX_BPF_STACK + mask_to_left; off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) *ptr_limit = MAX_BPF_STACK + off; else *ptr_limit = -off - 1; - return 0; + return *ptr_limit >= max ? -ERANGE : 0; case PTR_TO_MAP_VALUE: + max = ptr_reg->map_ptr->value_size; if (mask_to_left) { *ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return 0; + return *ptr_limit >= max ? -ERANGE : 0; default: return -EINVAL; } From cfbfa9c721862acbebab2828734b2b0bd41fe772 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Tue, 2 Mar 2021 00:01:59 +0800 Subject: [PATCH 7/9] net: dsa: tag_mtk: fix 802.1ad VLAN egress commit 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 upstream. A different TPID bit is used for 802.1ad VLAN frames. Reported-by: Ilario Gelmetti Fixes: f0af34317f4b ("net: dsa: mediatek: combine MediaTek tag with VLAN tag") Signed-off-by: DENG Qingfang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dsa/tag_mtk.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 6574425ad94c..1f57dce969f7 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -20,6 +20,7 @@ #define MTK_HDR_LEN 4 #define MTK_HDR_XMIT_UNTAGGED 0 #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 +#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) #define MTK_HDR_XMIT_SA_DIS BIT(6) @@ -28,8 +29,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 xmit_tpid; u8 *mtk_tag; - bool is_vlan_skb = true; unsigned char *dest = eth_hdr(skb)->h_dest; bool is_multicast_skb = is_multicast_ether_addr(dest) && !is_broadcast_ether_addr(dest); @@ -40,13 +41,20 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * the both special and VLAN tag at the same time and then look up VLAN * table with VID. */ - if (!skb_vlan_tagged(skb)) { + switch (skb->protocol) { + case htons(ETH_P_8021Q): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100; + break; + case htons(ETH_P_8021AD): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8; + break; + default: if (skb_cow_head(skb, MTK_HDR_LEN) < 0) return NULL; + xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); - is_vlan_skb = false; } mtk_tag = skb->data + 2 * ETH_ALEN; @@ -54,8 +62,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. */ - mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : - MTK_HDR_XMIT_UNTAGGED; + mtk_tag[0] = xmit_tpid; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; /* Disable SA learning for multicast frames */ @@ -63,7 +70,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; /* Tag control information is kept for 802.1Q */ - if (!is_vlan_skb) { + if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) { mtk_tag[2] = 0; mtk_tag[3] = 0; } From 1270080b64f76498667f37b09b5c1a6d0cc9d396 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 22 Feb 2021 14:30:10 -0800 Subject: [PATCH 8/9] net: dsa: b53: Support setting learning on port commit f9b3827ee66cfcf297d0acd6ecf33653a5f297ef upstream. Add support for being able to set the learning attribute on port, and make sure that the standalone ports start up with learning disabled. We can remove the code in bcm_sf2 that configured the ports learning attribute because we want the standalone ports to have learning disabled by default and port 7 cannot be bridged, so its learning attribute will not change past its initial configuration. Signed-off-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/b53/b53_common.c | 19 +++++++++++++++++++ drivers/net/dsa/b53/b53_regs.h | 1 + drivers/net/dsa/bcm_sf2.c | 5 ----- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 335ce1e84904..7eaeab65d39f 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -507,12 +507,27 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } EXPORT_SYMBOL(b53_imp_vlan_setup); +static void b53_port_set_learning(struct b53_device *dev, int port, + bool learning) +{ + u16 reg; + + b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, ®); + if (learning) + reg &= ~BIT(port); + else + reg |= BIT(port); + b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg); +} + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; unsigned int cpu_port = ds->ports[port].cpu_dp->index; u16 pvlan; + b53_port_set_learning(dev, port, false); + /* Clear the Rx and Tx disable bits and set to no spanning tree */ b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), 0); @@ -620,6 +635,7 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); b53_brcm_hdr_setup(dev->ds, port); + b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -1517,6 +1533,8 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br) b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; + b53_port_set_learning(dev, port, true); + return 0; } EXPORT_SYMBOL(b53_br_join); @@ -1564,6 +1582,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) vl->untag |= BIT(port) | BIT(cpu_port); b53_set_vlan_entry(dev, pvid, vl); } + b53_port_set_learning(dev, port, false); } EXPORT_SYMBOL(b53_br_leave); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index c90985c294a2..b2c539a42154 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -115,6 +115,7 @@ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 #define B53_IPMC_FLOOD_MASK 0x36 +#define B53_DIS_LEARNING 0x3c /* * Override Ports 0-7 State on devices with xMII interfaces (8 bit) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 613f03f9d9ec..e9fe3897bd9c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -173,11 +173,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable learning */ - reg = core_readl(priv, CORE_DIS_LEARN); - reg &= ~BIT(port); - core_writel(priv, reg, CORE_DIS_LEARN); - /* Enable Broadcom tags for that port if requested */ if (priv->brcm_tag_mask & BIT(port)) b53_brcm_hdr_setup(ds, port); From 125222814e7b8f84df767d6ab622aff2a6d2f234 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Mar 2021 10:38:35 +0100 Subject: [PATCH 9/9] Linux 4.19.182 Tested-by: Pavel Machek (CIP) Tested-by: Jason Self Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20210319121744.114946147@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 29930269b486..5956638e0f4d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 181 +SUBLEVEL = 182 EXTRAVERSION = NAME = "People's Front"