From 6e4dd7d2636d8e8fb634cc9e7fc84eb13928e88a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:27 +0200 Subject: [PATCH 01/16] x86/cpu: Fix __x86_return_thunk symbol type commit 77f67119004296a9b2503b377d610e08b08afc2a upstream. Commit fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") reimplemented __x86_return_thunk with a mix of SYM_FUNC_START and SYM_CODE_END, this is not a sane combination. Since nothing should ever actually 'CALL' this, make it consistently CODE. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.571027074@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 30e76fab678a..591d4a2419b5 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -207,7 +207,9 @@ SYM_CODE_END(srso_safe_ret) SYM_FUNC_END(srso_untrain_ret) __EXPORT_THUNK(srso_untrain_ret) -SYM_FUNC_START(__x86_return_thunk) +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS int3 From 8bb1ed390d358f6c167f45a27305b775aa8fc898 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:28 +0200 Subject: [PATCH 02/16] x86/cpu: Fix up srso_safe_ret() and __x86_return_thunk() commit af023ef335f13c8b579298fc432daeef609a9e60 upstream. vmlinux.o: warning: objtool: srso_untrain_ret() falls through to next function __x86_return_skl() vmlinux.o: warning: objtool: __x86_return_thunk() falls through to next function __x86_return_skl() This is because these functions (can) end with CALL, which objtool does not consider a terminating instruction. Therefore, replace the INT3 instruction (which is a non-fatal trap) with UD2 (which is a fatal-trap). This indicates execution will not continue past this point. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.637802730@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 591d4a2419b5..271f6560448d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -202,7 +202,7 @@ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) int3 lfence call srso_safe_ret - int3 + ud2 SYM_CODE_END(srso_safe_ret) SYM_FUNC_END(srso_untrain_ret) __EXPORT_THUNK(srso_untrain_ret) @@ -212,7 +212,7 @@ SYM_CODE_START(__x86_return_thunk) ANNOTATE_NOENDBR ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS - int3 + ud2 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) From 53ebbe1c8c02aa7b7f072dd2f96bca4faa1daa59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:30 +0200 Subject: [PATCH 03/16] x86/alternative: Make custom return thunk unconditional commit 095b8303f3835c68ac4a8b6d754ca1c3b6230711 upstream. There is infrastructure to rewrite return thunks to point to any random thunk one desires, unwrap that from CALL_THUNKS, which up to now was the sole user of that. [ bp: Make the thunks visible on 32-bit and add ifdeffery for the 32-bit builds. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.775293785@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 5 +++++ arch/x86/kernel/cpu/bugs.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 31fa631c8587..9a4ffd321d29 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -210,7 +210,12 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + extern void zen_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_untrain_ret_alias(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d98f33ea57e4..666f5bed2d97 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,6 +62,8 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd); static DEFINE_MUTEX(spec_ctrl_mutex); +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { From 44dbc912fd8a132f01eca8fc0928d6c9beb1f51c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:31 +0200 Subject: [PATCH 04/16] x86/cpu: Clean up SRSO return thunk mess commit d43490d0ab824023e11d0b57d0aeec17a6e0ca13 upstream. Use the existing configurable return thunk. There is absolute no justification for having created this __x86_return_thunk alternative. To clarify, the whole thing looks like: Zen3/4 does: srso_alias_untrain_ret: nop2 lfence jmp srso_alias_return_thunk int3 srso_alias_safe_ret: // aliasses srso_alias_untrain_ret just so add $8, %rsp ret int3 srso_alias_return_thunk: call srso_alias_safe_ret ud2 While Zen1/2 does: srso_untrain_ret: movabs $foo, %rax lfence call srso_safe_ret (jmp srso_return_thunk ?) int3 srso_safe_ret: // embedded in movabs instruction add $8,%rsp ret int3 srso_return_thunk: call srso_safe_ret ud2 While retbleed does: zen_untrain_ret: test $0xcc, %bl lfence jmp zen_return_thunk int3 zen_return_thunk: // embedded in the test instruction ret int3 Where Zen1/2 flush the BTB entry using the instruction decoder trick (test,movabs) Zen3/4 use BTB aliasing. SRSO adds a return sequence (srso_safe_ret()) which forces the function return instruction to speculate into a trap (UD2). This RET will then mispredict and execution will continue at the return site read from the top of the stack. Pick one of three options at boot (evey function can only ever return once). [ bp: Fixup commit message uarch details and add them in a comment in the code too. Add a comment about the srso_select_mitigation() dependency on retbleed_select_mitigation(). Add moar ifdeffery for 32-bit builds. Add a dummy srso_untrain_ret_alias() definition for 32-bit alternatives needing the symbol. ] Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.842775684@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 5 +++ arch/x86/kernel/cpu/bugs.c | 17 ++++++-- arch/x86/kernel/vmlinux.lds.S | 4 +- arch/x86/lib/retpoline.S | 58 ++++++++++++++++++++-------- tools/objtool/arch/x86/decode.c | 2 +- 5 files changed, 64 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 9a4ffd321d29..71899b8a86e7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -216,9 +216,14 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif +extern void zen_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + extern void zen_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_untrain_ret_alias(void); + extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 666f5bed2d97..757c973531c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -166,8 +166,13 @@ void __init cpu_select_mitigations(void) md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - gds_select_mitigation(); + + /* + * srso_select_mitigation() depends and must run after + * retbleed_select_mitigation(). + */ srso_select_mitigation(); + gds_select_mitigation(); } /* @@ -1015,6 +1020,9 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (IS_ENABLED(CONFIG_RETHUNK)) + x86_return_thunk = zen_return_thunk; + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -2422,10 +2430,13 @@ static void __init srso_select_mitigation(void) */ setup_force_cpu_cap(X86_FEATURE_RETHUNK); - if (boot_cpu_data.x86 == 0x19) + if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - else + x86_return_thunk = srso_alias_return_thunk; + } else { setup_force_cpu_cap(X86_FEATURE_SRSO); + x86_return_thunk = srso_return_thunk; + } srso_mitigation = SRSO_MITIGATION_SAFE_RET; } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e6939ebb606a..8af3ac61ee5f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -507,8 +507,8 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif - #ifdef CONFIG_RETHUNK -. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned"); +#ifdef CONFIG_RETHUNK +. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned"); . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 271f6560448d..a08a53b31447 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -94,22 +94,27 @@ SYM_CODE_END(__x86_indirect_thunk_array) .section .text.__x86.rethunk_untrain SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence - jmp __x86_return_thunk + jmp srso_alias_return_thunk SYM_FUNC_END(srso_untrain_ret_alias) __EXPORT_THUNK(srso_untrain_ret_alias) .section .text.__x86.rethunk_safe +#else +/* dummy definition for alternatives */ +SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_untrain_ret_alias) #endif -/* Needs a definition for the __x86_return_thunk alternative below. */ SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) -#ifdef CONFIG_CPU_SRSO add $8, %_ASM_SP UNWIND_HINT_FUNC -#endif ANNOTATE_UNRET_SAFE ret int3 @@ -117,9 +122,16 @@ SYM_FUNC_END(srso_safe_ret_alias) .section .text.__x86.return_thunk +SYM_CODE_START(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret_alias + ud2 +SYM_CODE_END(srso_alias_return_thunk) + /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. * 2) The instruction at zen_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. @@ -127,7 +139,7 @@ SYM_FUNC_END(srso_safe_ret_alias) * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (__ret - zen_untrain_ret), 0xcc + .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* @@ -135,16 +147,16 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP zen_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * zen_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from zen_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -156,13 +168,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, zen_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__ret, SYM_L_GLOBAL) +SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__ret) +SYM_CODE_END(zen_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -173,7 +185,7 @@ SYM_CODE_END(__ret) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __ret + jmp zen_return_thunk int3 SYM_FUNC_END(zen_untrain_ret) __EXPORT_THUNK(zen_untrain_ret) @@ -194,12 +206,19 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR .byte 0x48, 0xb8 +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) add $8, %_ASM_SP ret int3 int3 int3 + /* end of movabs */ lfence call srso_safe_ret ud2 @@ -207,12 +226,19 @@ SYM_CODE_END(srso_safe_ret) SYM_FUNC_END(srso_untrain_ret) __EXPORT_THUNK(srso_untrain_ret) +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR - ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ - "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS - ud2 + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index a60c5efe34b3..8d8bc07184ea 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -799,5 +799,5 @@ bool arch_is_rethunk(struct symbol *sym) return !strcmp(sym->name, "__x86_return_thunk") || !strcmp(sym->name, "srso_untrain_ret") || !strcmp(sym->name, "srso_safe_ret") || - !strcmp(sym->name, "__ret"); + !strcmp(sym->name, "zen_return_thunk"); } From 54dde78a50a8eb0d6a293c8416fcf6365d189af3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:32 +0200 Subject: [PATCH 05/16] x86/cpu: Rename original retbleed methods commit d025b7bac07a6e90b6b98b487f88854ad9247c39 upstream. Rename the original retbleed return thunk and untrain_ret to retbleed_return_thunk() and retbleed_untrain_ret(). No functional changes. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.909378169@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 8 ++++---- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/lib/retpoline.S | 30 ++++++++++++++-------------- tools/objtool/arch/x86/decode.c | 2 +- tools/objtool/check.c | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 71899b8a86e7..da8795d73836 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -168,7 +168,7 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#define CALL_ZEN_UNTRAIN_RET "call retbleed_untrain_ret" #else #define CALL_ZEN_UNTRAIN_RET "" #endif @@ -178,7 +178,7 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * While zen_untrain_ret() doesn't clobber anything but requires stack, + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point @@ -216,11 +216,11 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif -extern void zen_return_thunk(void); +extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); -extern void zen_untrain_ret(void); +extern void retbleed_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_untrain_ret_alias(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 757c973531c5..07eebfa39f5b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1021,7 +1021,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_UNRET); if (IS_ENABLED(CONFIG_RETHUNK)) - x86_return_thunk = zen_return_thunk; + x86_return_thunk = retbleed_return_thunk; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8af3ac61ee5f..72efa6d59b2f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -508,7 +508,7 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #ifdef CONFIG_RETHUNK -. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned"); +. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a08a53b31447..ff2196cbf0d6 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -131,32 +131,32 @@ SYM_CODE_END(srso_alias_return_thunk) /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc -SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP zen_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * zen_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from zen_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -168,13 +168,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, zen_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(zen_return_thunk) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -185,13 +185,13 @@ SYM_CODE_END(zen_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp zen_return_thunk + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) +__EXPORT_THUNK(retbleed_untrain_ret) /* - * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret() + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() * above. On kernel entry, srso_untrain_ret() is executed which is a * * movabs $0xccccccc308c48348,%rax diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8d8bc07184ea..29c35279c7ed 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -799,5 +799,5 @@ bool arch_is_rethunk(struct symbol *sym) return !strcmp(sym->name, "__x86_return_thunk") || !strcmp(sym->name, "srso_untrain_ret") || !strcmp(sym->name, "srso_safe_ret") || - !strcmp(sym->name, "zen_return_thunk"); + !strcmp(sym->name, "retbleed_return_thunk"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c2c350933a23..8ec97762b607 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1430,7 +1430,7 @@ static int add_jump_destinations(struct objtool_file *file) struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); /* - * This is a special case for zen_untrain_ret(). + * This is a special case for retbleed_untrain_ret(). * It jumps to __x86_return_thunk(), but objtool * can't find the thunk's starting RET * instruction, because the RET is also in the From e6b40d2cb5aae35cc3659f9b74c999a01120ad30 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:33 +0200 Subject: [PATCH 06/16] x86/cpu: Rename srso_(.*)_alias to srso_alias_\1 commit 42be649dd1f2eee6b1fb185f1a231b9494cf095f upstream. For a more consistent namespace. [ bp: Fixup names in the doc too. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.976236447@infradead.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/srso.rst | 4 ++-- arch/x86/include/asm/nospec-branch.h | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 8 +++---- arch/x86/lib/retpoline.S | 26 +++++++++++----------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst index 2f923c805802..f79cb11b080f 100644 --- a/Documentation/admin-guide/hw-vuln/srso.rst +++ b/Documentation/admin-guide/hw-vuln/srso.rst @@ -124,8 +124,8 @@ sequence. To ensure the safety of this mitigation, the kernel must ensure that the safe return sequence is itself free from attacker interference. In Zen3 and Zen4, this is accomplished by creating a BTB alias between the -untraining function srso_untrain_ret_alias() and the safe return -function srso_safe_ret_alias() which results in evicting a potentially +untraining function srso_alias_untrain_ret() and the safe return +function srso_alias_safe_ret() which results in evicting a potentially poisoned BTB entry and using that safe one for all function returns. In older Zen1 and Zen2, this is accomplished using a reinterpretation diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index da8795d73836..08262e6badc3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -195,7 +195,7 @@ #ifdef CONFIG_CPU_SRSO ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \ - "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif .endm @@ -222,7 +222,7 @@ extern void srso_alias_return_thunk(void); extern void retbleed_untrain_ret(void); extern void srso_untrain_ret(void); -extern void srso_untrain_ret_alias(void); +extern void srso_alias_untrain_ret(void); extern void entry_ibpb(void); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 72efa6d59b2f..6b137c002778 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,10 +141,10 @@ SECTIONS #ifdef CONFIG_CPU_SRSO /* - * See the comment above srso_untrain_ret_alias()'s + * See the comment above srso_alias_untrain_ret()'s * definition. */ - . = srso_untrain_ret_alias | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); + . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); *(.text.__x86.rethunk_safe) #endif ALIGN_ENTRY_TEXT_END @@ -523,8 +523,8 @@ INIT_PER_CPU(irq_stack_backing_store); * Instead do: (A | B) - (A & B) in order to compute the XOR * of the two function addresses: */ -. = ASSERT(((ABSOLUTE(srso_untrain_ret_alias) | srso_safe_ret_alias) - - (ABSOLUTE(srso_untrain_ret_alias) & srso_safe_ret_alias)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), +. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) - + (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), "SRSO function pair won't alias"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index ff2196cbf0d6..4ba7739be027 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -76,56 +76,56 @@ SYM_CODE_END(__x86_indirect_thunk_array) #ifdef CONFIG_RETHUNK /* - * srso_untrain_ret_alias() and srso_safe_ret_alias() are placed at + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at * special addresses: * - * - srso_untrain_ret_alias() is 2M aligned - * - srso_safe_ret_alias() is also in the same 2M page but bits 2, 8, 14 + * - srso_alias_untrain_ret() is 2M aligned + * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 * and 20 in its virtual address are set (while those bits in the - * srso_untrain_ret_alias() function are cleared). + * srso_alias_untrain_ret() function are cleared). * * This guarantees that those two addresses will alias in the branch * target buffer of Zen3/4 generations, leading to any potential * poisoned entries at that BTB slot to get evicted. * - * As a result, srso_safe_ret_alias() becomes a safe return. + * As a result, srso_alias_safe_ret() becomes a safe return. */ #ifdef CONFIG_CPU_SRSO .section .text.__x86.rethunk_untrain -SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence jmp srso_alias_return_thunk -SYM_FUNC_END(srso_untrain_ret_alias) -__EXPORT_THUNK(srso_untrain_ret_alias) +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) .section .text.__x86.rethunk_safe #else /* dummy definition for alternatives */ -SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(srso_untrain_ret_alias) +SYM_FUNC_END(srso_alias_untrain_ret) #endif -SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) +SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) add $8, %_ASM_SP UNWIND_HINT_FUNC ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(srso_safe_ret_alias) +SYM_FUNC_END(srso_alias_safe_ret) .section .text.__x86.return_thunk SYM_CODE_START(srso_alias_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR - call srso_safe_ret_alias + call srso_alias_safe_ret ud2 SYM_CODE_END(srso_alias_return_thunk) From 529a9f087a7e03e87dd9c2acd687739e3e348045 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:34 +0200 Subject: [PATCH 07/16] x86/cpu: Cleanup the untrain mess commit e7c25c441e9e0fa75b4c83e0b26306b702cfe90d upstream. Since there can only be one active return_thunk, there only needs be one (matching) untrain_ret. It fundamentally doesn't make sense to allow multiple untrain_ret at the same time. Fold all the 3 different untrain methods into a single (temporary) helper stub. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121149.042774962@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 12 ++++-------- arch/x86/kernel/cpu/bugs.c | 1 + arch/x86/lib/retpoline.S | 7 +++++++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 08262e6badc3..2f123d4fb85b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -168,9 +168,9 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call retbleed_untrain_ret" +#define CALL_UNTRAIN_RET "call entry_untrain_ret" #else -#define CALL_ZEN_UNTRAIN_RET "" +#define CALL_UNTRAIN_RET "" #endif /* @@ -189,14 +189,9 @@ defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif - -#ifdef CONFIG_CPU_SRSO - ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \ - "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS -#endif .endm #else /* __ASSEMBLY__ */ @@ -224,6 +219,7 @@ extern void retbleed_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_alias_untrain_ret(void); +extern void entry_untrain_ret(void); extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 07eebfa39f5b..c2f8f98f20d5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2429,6 +2429,7 @@ static void __init srso_select_mitigation(void) * like ftrace, static_call, etc. */ setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 4ba7739be027..08a9faa220fe 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -233,6 +233,13 @@ SYM_CODE_START(srso_return_thunk) ud2 SYM_CODE_END(srso_return_thunk) +SYM_FUNC_START(entry_untrain_ret) + ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ + "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ + "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS +SYM_FUNC_END(entry_untrain_ret) +__EXPORT_THUNK(entry_untrain_ret) + SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR From c16d0b3baff418ca09c4f6a7e04f0bfc472187bf Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Aug 2023 21:29:50 +0200 Subject: [PATCH 08/16] x86/srso: Explain the untraining sequences a bit more commit 9dbd23e42ff0b10c9b02c9e649c76e5228241a8e upstream. The goal is to eventually have a proper documentation about all this. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814164447.GFZNpZ/64H4lENIe94@fat_crate.local Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 08a9faa220fe..455a44aa22fb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -129,6 +129,25 @@ SYM_CODE_START(srso_alias_return_thunk) ud2 SYM_CODE_END(srso_alias_return_thunk) +/* + * Some generic notes on the untraining sequences: + * + * They are interchangeable when it comes to flushing potentially wrong + * RET predictions from the BTB. + * + * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the + * Retbleed sequence because the return sequence done there + * (srso_safe_ret()) is longer and the return sequence must fully nest + * (end before) the untraining sequence. Therefore, the untraining + * sequence must fully overlap the return sequence. + * + * Regarding alignment - the instructions which need to be untrained, + * must all start at a cacheline boundary for Zen1/2 generations. That + * is, instruction sequences starting at srso_safe_ret() and + * the respective instruction sequences at retbleed_return_thunk() + * must start at a cacheline boundary. + */ + /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for From c1f831425fe900fd18789733b66f4c987345296c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Aug 2023 12:44:19 +0200 Subject: [PATCH 09/16] x86/static_call: Fix __static_call_fixup() commit 54097309620ef0dc2d7083783dc521c6a5fef957 upstream. Christian reported spurious module load crashes after some of Song's module memory layout patches. Turns out that if the very last instruction on the very last page of the module is a 'JMP __x86_return_thunk' then __static_call_fixup() will trip a fault and die. And while the module rework made this slightly more likely to happen, it's always been possible. Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") Reported-by: Christian Bricart Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20230816104419.GA982867@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/static_call.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index a9b54b795ebf..3fbb49168827 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -184,6 +184,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform); */ bool __static_call_fixup(void *tramp, u8 op, void *dest) { + unsigned long addr = (unsigned long)tramp; + /* + * Not all .return_sites are a static_call trampoline (most are not). + * Check if the 3 bytes after the return are still kernel text, if not, + * then this definitely is not a trampoline and we need not worry + * further. + * + * This avoids the memcmp() below tripping over pagefaults etc.. + */ + if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) && + !kernel_text_address(addr + 7)) + return false; + if (memcmp(tramp+5, tramp_ud, 3)) { /* Not a trampoline site, not our problem. */ return false; From b41eb316c95c98d16dba2045c4890a3020f0a5bc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Aug 2023 08:52:55 -0700 Subject: [PATCH 10/16] x86/retpoline: Don't clobber RFLAGS during srso_safe_ret() commit ba5ca5e5e6a1d55923e88b4a83da452166f5560e upstream. Use LEA instead of ADD when adjusting %rsp in srso_safe_ret{,_alias}() so as to avoid clobbering flags. Drop one of the INT3 instructions to account for the LEA consuming one more byte than the ADD. KVM's emulator makes indirect calls into a jump table of sorts, where the destination of each call is a small blob of code that performs fast emulation by executing the target instruction with fixed operands. E.g. to emulate ADC, fastop() invokes adcb_al_dl(): adcb_al_dl: <+0>: adc %dl,%al <+2>: jmp <__x86_return_thunk> A major motivation for doing fast emulation is to leverage the CPU to handle consumption and manipulation of arithmetic flags, i.e. RFLAGS is both an input and output to the target of the call. fastop() collects the RFLAGS result by pushing RFLAGS onto the stack and popping them back into a variable (held in %rdi in this case): asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" <+71>: mov 0xc0(%r8),%rdx <+78>: mov 0x100(%r8),%rcx <+85>: push %rdi <+86>: popf <+87>: call *%rsi <+89>: nop <+90>: nop <+91>: nop <+92>: pushf <+93>: pop %rdi and then propagating the arithmetic flags into the vCPU's emulator state: ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); <+64>: and $0xfffffffffffff72a,%r9 <+94>: and $0x8d5,%edi <+109>: or %rdi,%r9 <+122>: mov %r9,0x10(%r8) The failures can be most easily reproduced by running the "emulator" test in KVM-Unit-Tests. If you're feeling a bit of deja vu, see commit b63f20a778c8 ("x86/retpoline: Don't clobber RFLAGS during CALL_NOSPEC on i386"). In addition, this breaks booting of clang-compiled guest on a gcc-compiled host where the host contains the %rsp-modifying SRSO mitigations. [ bp: Massage commit message, extend, remove addresses. ] Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Closes: https://lore.kernel.org/all/de474347-122d-54cd-eabf-9dcc95ab9eae@amd.com Reported-by: Srikanth Aithal Reported-by: Nathan Chancellor Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Tested-by: Nathan Chancellor Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20230810013334.GA5354@dev-arch.thelio-3990X/ Link: https://lore.kernel.org/r/20230811155255.250835-1-seanjc@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/retpoline.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 455a44aa22fb..5a55ee2e80f0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -113,7 +113,7 @@ SYM_FUNC_END(srso_alias_untrain_ret) #endif SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) - add $8, %_ASM_SP + lea 8(%_ASM_SP), %_ASM_SP UNWIND_HINT_FUNC ANNOTATE_UNRET_SAFE ret @@ -213,7 +213,7 @@ __EXPORT_THUNK(retbleed_untrain_ret) * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() * above. On kernel entry, srso_untrain_ret() is executed which is a * - * movabs $0xccccccc308c48348,%rax + * movabs $0xccccc30824648d48,%rax * * and when the return thunk executes the inner label srso_safe_ret() * later, it is a stack manipulation and a RET which is mispredicted and @@ -232,11 +232,10 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * the stack. */ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) - add $8, %_ASM_SP + lea 8(%_ASM_SP), %_ASM_SP ret int3 int3 - int3 /* end of movabs */ lfence call srso_safe_ret From e4679a0342e05a962639a6ec3781f257f417f0ff Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 11 Aug 2023 23:38:24 +0200 Subject: [PATCH 11/16] x86/CPU/AMD: Fix the DIV(0) initial fix attempt commit f58d6fbcb7c848b7f2469be339bc571f2e9d245b upstream. Initially, it was thought that doing an innocuous division in the #DE handler would take care to prevent any leaking of old data from the divider but by the time the fault is raised, the speculation has already advanced too far and such data could already have been used by younger operations. Therefore, do the innocuous division on every exit to userspace so that userspace doesn't see any potentially old data from integer divisions in kernel space. Do the same before VMRUN too, to protect host data from leaking into the guest too. Fixes: 77245f1c3c64 ("x86/CPU/AMD: Do not leak quotient data after a division by 0") Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20230811213824.10025-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/entry-common.h | 1 + arch/x86/kernel/cpu/amd.c | 1 + arch/x86/kernel/traps.c | 2 -- arch/x86/kvm/svm/svm.c | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 674ed46d3ced..11203a9fe0a8 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { mds_user_clear_cpu_buffers(); + amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 239b302973d7..f240c978d85e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1295,3 +1295,4 @@ void noinstr amd_clear_divider(void) asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) :: "a" (0), "d" (0), "r" (1)); } +EXPORT_SYMBOL_GPL(amd_clear_divider); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e8795d8b0f1..c0a5a4f225d9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -206,8 +206,6 @@ DEFINE_IDTENTRY(exc_divide_error) { do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE, FPE_INTDIV, error_get_trap_addr(regs)); - - amd_clear_divider(); } DEFINE_IDTENTRY(exc_overflow) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index fdb6007f2eb8..a96f9a17e8b5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3947,6 +3947,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + amd_clear_divider(); + if (sev_es_guest(vcpu->kvm)) __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); else From dae93ed961a8a315cefe64491e8573c5d9e3484e Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sun, 13 Aug 2023 12:39:34 +0200 Subject: [PATCH 12/16] x86/srso: Disable the mitigation on unaffected configurations commit e9fbc47b818b964ddff5df5b2d5c0f5f32f4a147 upstream. Skip the srso cmd line parsing which is not needed on Zen1/2 with SMT disabled and with the proper microcode applied (latter should be the case anyway) as those are not affected. Fixes: 5a15d8348881 ("x86/srso: Tie SBPB bit setting to microcode patch detection") Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230813104517.3346-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c2f8f98f20d5..971f28944037 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2399,8 +2399,10 @@ static void __init srso_select_mitigation(void) * IBPB microcode has been applied. */ if ((boot_cpu_data.x86 < 0x19) && - (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) + (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) { setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + return; + } } if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { @@ -2686,6 +2688,9 @@ static ssize_t gds_show_state(char *buf) static ssize_t srso_show_state(char *buf) { + if (boot_cpu_has(X86_FEATURE_SRSO_NO)) + return sysfs_emit(buf, "Not affected\n"); + return sysfs_emit(buf, "%s%s\n", srso_strings[srso_mitigation], (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); From c8b056a3b4ebb33adbb873cab152ed499d1a1dcb Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 11 Jul 2023 11:19:51 +0200 Subject: [PATCH 13/16] x86/retpoline,kprobes: Fix position of thunk sections with CONFIG_LTO_CLANG commit 79cd2a11224eab86d6673fe8a11d2046ae9d2757 upstream. The linker script arch/x86/kernel/vmlinux.lds.S matches the thunk sections ".text.__x86.*" from arch/x86/lib/retpoline.S as follows: .text { [...] TEXT_TEXT [...] __indirect_thunk_start = .; *(.text.__x86.*) __indirect_thunk_end = .; [...] } Macro TEXT_TEXT references TEXT_MAIN which normally expands to only ".text". However, with CONFIG_LTO_CLANG, TEXT_MAIN becomes ".text .text.[0-9a-zA-Z_]*" which wrongly matches also the thunk sections. The output layout is then different than expected. For instance, the currently defined range [__indirect_thunk_start, __indirect_thunk_end] becomes empty. Prevent the problem by using ".." as the first separator, for example, ".text..__x86.indirect_thunk". This pattern is utilized by other explicit section names which start with one of the standard prefixes, such as ".text" or ".data", and that need to be individually selected in the linker script. [ nathan: Fix conflicts with SRSO and fold in fix issue brought up by Andrew Cooper in post-review: https://lore.kernel.org/20230803230323.1478869-1-andrew.cooper3@citrix.com ] Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO") Signed-off-by: Petr Pavlu Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Nathan Chancellor Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230711091952.27944-2-petr.pavlu@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/vmlinux.lds.S | 8 ++++---- arch/x86/lib/retpoline.S | 8 ++++---- tools/objtool/check.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 6b137c002778..78ccb5ec3c0e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -134,7 +134,7 @@ SECTIONS KPROBES_TEXT ALIGN_ENTRY_TEXT_BEGIN #ifdef CONFIG_CPU_SRSO - *(.text.__x86.rethunk_untrain) + *(.text..__x86.rethunk_untrain) #endif ENTRY_TEXT @@ -145,7 +145,7 @@ SECTIONS * definition. */ . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); - *(.text.__x86.rethunk_safe) + *(.text..__x86.rethunk_safe) #endif ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT @@ -154,8 +154,8 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) - *(.text.__x86.return_thunk) + *(.text..__x86.indirect_thunk) + *(.text..__x86.return_thunk) __indirect_thunk_end = .; #endif } :text =0xcccc diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 5a55ee2e80f0..65c5c44f006b 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -11,7 +11,7 @@ #include #include - .section .text.__x86.indirect_thunk + .section .text..__x86.indirect_thunk .macro RETPOLINE reg ANNOTATE_INTRA_FUNCTION_CALL @@ -91,7 +91,7 @@ SYM_CODE_END(__x86_indirect_thunk_array) * As a result, srso_alias_safe_ret() becomes a safe return. */ #ifdef CONFIG_CPU_SRSO - .section .text.__x86.rethunk_untrain + .section .text..__x86.rethunk_untrain SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) UNWIND_HINT_FUNC @@ -102,7 +102,7 @@ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) SYM_FUNC_END(srso_alias_untrain_ret) __EXPORT_THUNK(srso_alias_untrain_ret) - .section .text.__x86.rethunk_safe + .section .text..__x86.rethunk_safe #else /* dummy definition for alternatives */ SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) @@ -120,7 +120,7 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) int3 SYM_FUNC_END(srso_alias_safe_ret) - .section .text.__x86.return_thunk + .section .text..__x86.return_thunk SYM_CODE_START(srso_alias_return_thunk) UNWIND_HINT_FUNC diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8ec97762b607..7c4e61cbefaa 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -379,7 +379,7 @@ static int decode_instructions(struct objtool_file *file) if (!strcmp(sec->name, ".noinstr.text") || !strcmp(sec->name, ".entry.text") || - !strncmp(sec->name, ".text.__x86.", 12)) + !strncmp(sec->name, ".text..__x86.", 13)) sec->noinstr = true; for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { From 4da4aae04b7f3d5ba4345a75f92de00a54644f11 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Aug 2023 13:59:21 +0200 Subject: [PATCH 14/16] objtool/x86: Fixup frame-pointer vs rethunk commit dbf46008775516f7f25c95b7760041c286299783 upstream. For stack-validation of a frame-pointer build, objtool validates that every CALL instruction is preceded by a frame-setup. The new SRSO return thunks violate this with their RSB stuffing trickery. Extend the __fentry__ exception to also cover the embedded_insn case used for this. This cures: vmlinux.o: warning: objtool: srso_untrain_ret+0xd: call without frame pointer save/setup Fixes: 4ae68b26c3ab ("objtool/x86: Fix SRSO mess") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20230816115921.GH980931@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7c4e61cbefaa..913bd361c368 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2450,12 +2450,17 @@ static int decode_sections(struct objtool_file *file) return 0; } -static bool is_fentry_call(struct instruction *insn) +static bool is_special_call(struct instruction *insn) { - if (insn->type == INSN_CALL && - insn->call_dest && - insn->call_dest->fentry) - return true; + if (insn->type == INSN_CALL) { + struct symbol *dest = insn->call_dest; + + if (!dest) + return false; + + if (dest->fentry) + return true; + } return false; } @@ -3448,7 +3453,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (ret) return ret; - if (opts.stackval && func && !is_fentry_call(insn) && + if (opts.stackval && func && !is_special_call(insn) && !has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); From 7487244912b13a918812b8f830e6a0dde269fdeb Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 15 Aug 2023 11:53:13 +0200 Subject: [PATCH 15/16] x86/srso: Correct the mitigation status when SMT is disabled commit 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66 upstream. Specify how is SRSO mitigated when SMT is disabled. Also, correct the SMT check for that. Fixes: e9fbc47b818b ("x86/srso: Disable the mitigation on unaffected configurations") Suggested-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20230814200813.p5czl47zssuej7nv@treble Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 971f28944037..3a893ab398a0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2398,8 +2398,7 @@ static void __init srso_select_mitigation(void) * Zen1/2 with SMT off aren't vulnerable after the right * IBPB microcode has been applied. */ - if ((boot_cpu_data.x86 < 0x19) && - (!cpu_smt_possible() || (cpu_smt_control == CPU_SMT_DISABLED))) { + if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { setup_force_cpu_cap(X86_FEATURE_SRSO_NO); return; } @@ -2689,7 +2688,7 @@ static ssize_t gds_show_state(char *buf) static ssize_t srso_show_state(char *buf) { if (boot_cpu_has(X86_FEATURE_SRSO_NO)) - return sysfs_emit(buf, "Not affected\n"); + return sysfs_emit(buf, "Mitigation: SMT disabled\n"); return sysfs_emit(buf, "%s%s\n", srso_strings[srso_mitigation], From cd363bb9548ec3208120bb3f55ff4ded2487d7fb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 Aug 2023 13:27:01 +0200 Subject: [PATCH 16/16] Linux 6.1.48 Link: https://lore.kernel.org/r/20230824141447.155846739@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: SeongJae Park Tested-by: Joel Fernandes (Google) Tested-by: Sudip Mukherjee Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Conor Dooley Tested-by: Takeshi Ogasawara Tested-by: Shuah Khan Tested-by: Bagas Sanjaya Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 375efcfb91f8..8bb8dd199c55 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 47 +SUBLEVEL = 48 EXTRAVERSION = NAME = Curry Ramen