From 125a6a65b9feb47a561a7ee98bf8ba91d82e6e2e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 6 Mar 2018 15:21:41 +0100 Subject: [PATCH 01/77] x86/MCE: Save microcode revision in machine check records commit fa94d0c6e0f3431523f5701084d799c77c7d4a4f upstream. Updating microcode used to be relatively rare. Now that it has become more common we should save the microcode version in a machine check record to make sure that those people looking at the error have this important information bundled with the rest of the logged information. [ Borislav: Simplify a bit. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20180301233449.24311-1-tony.luck@intel.com [bwh: Backported to 4.9: - Also add ppin field to struct mce, to match upstream UAPI - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/mce.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 69a6e07e3149..db7dae58745f 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -28,6 +28,8 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode;/* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 25310d2b8609..d9ad49ca3cbe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -139,6 +139,8 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + + m->microcode = boot_cpu_data.microcode; } DEFINE_PER_CPU(struct mce, injectm); @@ -309,7 +311,7 @@ static void print_mce(struct mce *m) */ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, - cpu_data(m->extcpu).microcode); + m->microcode); pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); } From 7a473303c9e176ccb2e5025a69797944f7d355a5 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 7 Nov 2018 17:09:42 +0000 Subject: [PATCH 02/77] x86/cpufeatures: Hide AMD-specific speculation flags Hide the AMD_{IBRS,IBPB,STIBP} flag from /proc/cpuinfo. This was done upstream as part of commit e7c587da1252 "x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP". That commit has already been backported but this part was omitted. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 98444b77fbe3..e493a5836ff1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -271,9 +271,9 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ From 98ccdae863f37056ad43681e3d2410790447973b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:19 -0400 Subject: [PATCH 03/77] x86/bugs: Add AMD's variant of SSB_NO commit 24809860012e0130fbafe536709e08a22b3e959e upstream. The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that the CPUID 8000_0008.EBX[26] will mean that the speculative store bypass disable is no longer needed. A copy of this document is available at: https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: andrew.cooper3@citrix.com Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-2-konrad.wilk@oracle.com [bwh: Backported to 4.9: adjust context, indentation] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kvm/cpuid.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e493a5836ff1..453ab6f3bca0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -275,6 +275,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3c01610c5ba9..9b8e912fd840 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -950,7 +950,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_speculation)) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c17d3893ae60..f1f1a128bbdb 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -355,7 +355,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = From 9ad055877c939e4d8661ede9e2aa1cc3691cef89 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:20 -0400 Subject: [PATCH 04/77] x86/bugs: Add AMD's SPEC_CTRL MSR usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6ac2f49edb1ef5446089c7c660017732886d62d6 upstream. The AMD document outlining the SSBD handling 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf mentions that if CPUID 8000_0008.EBX[24] is set we should be using the SPEC_CTRL MSR (0x48) over the VIRT SPEC_CTRL MSR (0xC001_011f) for speculative store bypass disable. This in effect means we should clear the X86_FEATURE_VIRT_SSBD flag so that we would prefer the SPEC_CTRL MSR. See the document titled: 124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=199889 Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Janakarajan Natarajan Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: Joerg Roedel Cc: Radim Krčmář Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Borislav Petkov Cc: David Woodhouse Cc: Kees Cook Link: https://lkml.kernel.org/r/20180601145921.9500-3-konrad.wilk@oracle.com [bwh: Backported to 4.9: - Update feature test in guest_cpuid_has_spec_ctrl() instead of svm_{get,set}_msr() - Adjust context, indentation] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 12 +++++++----- arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kvm/cpuid.c | 10 ++++++++-- arch/x86/kvm/cpuid.h | 2 +- arch/x86/kvm/svm.c | 2 +- 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 453ab6f3bca0..234d74186046 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -274,6 +274,7 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6221166e3fca..b1146405ce8a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -531,18 +531,20 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: + case X86_VENDOR_AMD: + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + x86_amd_ssb_disable(); + break; + } x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9b8e912fd840..59b2dc011f7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -752,6 +752,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f1f1a128bbdb..b6435f3be254 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -355,7 +355,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD) | F(AMD_SSB_NO); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -633,7 +634,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 8a841b9d8f84..b2bf8e1d5782 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -176,7 +176,7 @@ static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *best; best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS))) + if (best && (best->ebx & (bit(X86_FEATURE_AMD_IBRS | bit(X86_FEATURE_AMD_SSBD))))) return true; best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SPEC_CTRL_SSBD))); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9a6d258c3c16..9338136a6a23 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3704,7 +3704,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; From c2185a44e742c82a6975ff1c96f8e95053658ca8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 1 Jun 2018 10:59:21 -0400 Subject: [PATCH 05/77] x86/bugs: Switch the selection of mitigation from CPU vendor to CPU features commit 108fab4b5c8f12064ef86e02cb0459992affb30f upstream. Both AMD and Intel can have SPEC_CTRL_MSR for SSBD. However AMD also has two more other ways of doing it - which are !SPEC_CTRL MSR ways. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Cc: Kees Cook Cc: kvm@vger.kernel.org Cc: KarimAllah Ahmed Cc: andrew.cooper3@citrix.com Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180601145921.9500-4-konrad.wilk@oracle.com Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b1146405ce8a..878832580f7f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -534,17 +534,12 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - case X86_VENDOR_AMD: - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - x86_amd_ssb_disable(); - break; - } + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_amd_ssb_disable(); + else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; } } From a7501dca303c95e15a1ecc8830729b264081ca1a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: [PATCH 06/77] x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR commit 612bc3b3d4be749f73a513a17d9b3ee1330d3487 upstream. On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 878832580f7f..b6438dfba461 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -165,7 +165,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -534,9 +535,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); From 2678bc5cef4007463d75ff4ddc35b86aa3fb04ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 9 Jan 2017 12:41:45 +0100 Subject: [PATCH 07/77] x86/microcode/intel: Add a helper which gives the microcode revision commit 4167709bbf826512a52ebd6aafda2be104adaec9 upstream. Since on Intel we're required to do CPUID(1) first, before reading the microcode revision MSR, let's add a special helper which does the required steps so that we don't forget to do them next time, when we want to read the microcode revision. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170109114147.5082-4-bp@alien8.de Signed-off-by: Thomas Gleixner [bwh: Backported to 4.9: - Keep using sync_core(), which will alway includes the necessary CPUID - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/microcode_intel.h | 15 +++++++++ arch/x86/kernel/cpu/intel.c | 11 ++----- arch/x86/kernel/cpu/microcode/intel.c | 43 ++++++++------------------ 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 5e69154c9f07..a61ec81b27db 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -52,6 +52,21 @@ struct extended_sigtable { #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) +static inline u32 intel_get_microcode_revision(void) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; +} + extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev); extern int microcode_sanity_check(void *mc, int print_err); extern int find_matching_signature(void *mc, unsigned int csig, int cpf); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cee0fec0d232..860f2fd9f540 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -137,14 +138,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { - unsigned lower_word; - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* Required by the SDM */ - sync_core(); - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } + if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) + c->microcode = intel_get_microcode_revision(); /* Now if any of them are set, check the blacklist and clear the lot */ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 79291d6fb301..af8fc3bc706b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -386,15 +386,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); csig.pf = 1 << ((val[1] >> 18) & 7); } - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - csig.rev = val[1]; + csig.rev = intel_get_microcode_revision(); uci->cpu_sig = csig; uci->valid = 1; @@ -618,7 +611,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci) static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { struct microcode_intel *mc; - unsigned int val[2]; + u32 rev; mc = uci->mc; if (!mc) @@ -626,21 +619,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc->hdr.rev) + rev = intel_get_microcode_revision(); + if (rev != mc->hdr.rev) return -1; #ifdef CONFIG_X86_64 /* Flush global tlb. This is precaution. */ flush_tlb_early(); #endif - uci->cpu_sig.rev = val[1]; + uci->cpu_sig.rev = rev; if (early) print_ucode(uci); @@ -904,8 +892,8 @@ static int apply_microcode_intel(int cpu) struct microcode_intel *mc; struct ucode_cpu_info *uci; struct cpuinfo_x86 *c; - unsigned int val[2]; static int prev_rev; + u32 rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) @@ -926,33 +914,28 @@ static int apply_microcode_intel(int cpu) /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - wrmsrl(MSR_IA32_UCODE_REV, 0); - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); + rev = intel_get_microcode_revision(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - if (val[1] != mc->hdr.rev) { + if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); return -1; } - if (val[1] != prev_rev) { + if (rev != prev_rev) { pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", - val[1], + rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24, (mc->hdr.date >> 16) & 0xff); - prev_rev = val[1]; + prev_rev = rev; } c = &cpu_data(cpu); - uci->cpu_sig.rev = val[1]; - c->microcode = val[1]; + uci->cpu_sig.rev = rev; + c->microcode = rev; return 0; } From 97d70759908b6a94f3679ce38f2ec3e4da9f3a22 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Wed, 28 Feb 2018 11:28:41 +0100 Subject: [PATCH 08/77] x86/microcode/intel: Check microcode revision before updating sibling threads commit c182d2b7d0ca48e0d6ff16f7d883161238c447ed upstream. After updating microcode on one of the threads of a core, the other thread sibling automatically gets the update since the microcode resources on a hyperthreaded core are shared between the two threads. Check the microcode revision on the CPU before performing a microcode update and thus save us the WRMSR 0x79 because it is a particularly expensive operation. [ Borislav: Massage changelog and coding style. ] Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Cc: Arjan Van De Ven Link: http://lkml.kernel.org/r/1519352533-15992-2-git-send-email-ashok.raj@intel.com Link: https://lkml.kernel.org/r/20180228102846.13447-3-bp@alien8.de [bwh: Backported to 4.9: return 0 in this case] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index af8fc3bc706b..8faa5fa208d6 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -617,6 +617,17 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (!mc) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + return 0; + } + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -891,7 +902,7 @@ static int apply_microcode_intel(int cpu) { struct microcode_intel *mc; struct ucode_cpu_info *uci; - struct cpuinfo_x86 *c; + struct cpuinfo_x86 *c = &cpu_data(cpu); static int prev_rev; u32 rev; @@ -912,6 +923,18 @@ static int apply_microcode_intel(int cpu) if (!get_matching_mc(mc, cpu)) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + c->microcode = rev; + return 0; + } + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -932,8 +955,6 @@ static int apply_microcode_intel(int cpu) prev_rev = rev; } - c = &cpu_data(cpu); - uci->cpu_sig.rev = rev; c->microcode = rev; From 9e99161b71447331007b1bef13be067353e9ff2b Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 31 Jul 2018 07:27:39 -0400 Subject: [PATCH 09/77] x86/microcode: Make sure boot_cpu_data.microcode is up-to-date commit 370a132bb2227ff76278f98370e0e701d86ff752 upstream. When preparing an MCE record for logging, boot_cpu_data.microcode is used to read out the microcode revision on the box. However, on systems where late microcode update has happened, the microcode revision output in a MCE log record is wrong because boot_cpu_data.microcode is not updated when the microcode gets updated. But, the microcode revision saved in boot_cpu_data's microcode member should be kept up-to-date, regardless, for consistency. Make it so. Fixes: fa94d0c6e0f3 ("x86/MCE: Save microcode revision in machine check records") Signed-off-by: Prarit Bhargava Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: sironi@amazon.de Link: http://lkml.kernel.org/r/20180731112739.32338-1-prarit@redhat.com [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 4 ++++ arch/x86/kernel/cpu/microcode/intel.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 732bb03fcf91..fdb750e76223 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -724,6 +724,10 @@ int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = mc_amd->hdr.patch_id; + return 0; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 8faa5fa208d6..4748488bfaaf 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -958,6 +958,10 @@ static int apply_microcode_intel(int cpu) uci->cpu_sig.rev = rev; c->microcode = rev; + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = rev; + return 0; } From e160f1dea94efbad525d169df5379402c8c5ad05 Mon Sep 17 00:00:00 2001 From: Filippo Sironi Date: Tue, 31 Jul 2018 17:29:30 +0200 Subject: [PATCH 10/77] x86/microcode: Update the new microcode revision unconditionally commit 8da38ebaad23fe1b0c4a205438676f6356607cfc upstream. Handle the case where microcode gets loaded on the BSP's hyperthread sibling first and the boot_cpu_data's microcode revision doesn't get updated because of early exit due to the siblings sharing a microcode engine. For that, simply write the updated revision on all CPUs unconditionally. Signed-off-by: Filippo Sironi Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: prarit@redhat.com Link: http://lkml.kernel.org/r/1533050970-14385-1-git-send-email-sironi@amazon.de [bwh: Backported to 4.9: - Keep returning 0 on success - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/amd.c | 20 ++++++++++---------- arch/x86/kernel/cpu/microcode/intel.c | 10 ++++------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index fdb750e76223..a19fddfb6bf8 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -707,26 +707,26 @@ int apply_microcode_amd(int cpu) return -1; /* need to apply patch? */ - if (rev >= mc_amd->hdr.patch_id) { - c->microcode = rev; - uci->cpu_sig.rev = rev; - return 0; - } + if (rev >= mc_amd->hdr.patch_id) + goto out; if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return -1; } - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, - mc_amd->hdr.patch_id); - uci->cpu_sig.rev = mc_amd->hdr.patch_id; - c->microcode = mc_amd->hdr.patch_id; + rev = mc_amd->hdr.patch_id; + + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); + +out: + uci->cpu_sig.rev = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) - boot_cpu_data.microcode = mc_amd->hdr.patch_id; + boot_cpu_data.microcode = rev; return 0; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 4748488bfaaf..1308abfc4758 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -929,11 +929,8 @@ static int apply_microcode_intel(int cpu) * already. */ rev = intel_get_microcode_revision(); - if (rev >= mc->hdr.rev) { - uci->cpu_sig.rev = rev; - c->microcode = rev; - return 0; - } + if (rev >= mc->hdr.rev) + goto out; /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -955,8 +952,9 @@ static int apply_microcode_intel(int cpu) prev_rev = rev; } +out: uci->cpu_sig.rev = rev; - c->microcode = rev; + c->microcode = rev; /* Update boot_cpu_data's revision too, if we're on the BSP: */ if (c->cpu_index == boot_cpu_data.cpu_index) From 08e501b5ff9f67f592b33d5e76e28d539a490041 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 2 Sep 2018 11:14:50 -0700 Subject: [PATCH 11/77] x86/mm: Use WRITE_ONCE() when setting PTEs commit 9bc4f28af75a91aea0ae383f50b0a430c4509303 upstream. When page-table entries are set, the compiler might optimize their assignment by using multiple instructions to set the PTE. This might turn into a security hazard if the user somehow manages to use the interim PTE. L1TF does not make our lives easier, making even an interim non-present PTE a security hazard. Using WRITE_ONCE() to set PTEs and friends should prevent this potential security hazard. I skimmed the differences in the binary with and without this patch. The differences are (obviously) greater when CONFIG_PARAVIRT=n as more code optimizations are possible. For better and worse, the impact on the binary with this patch is pretty small. Skimming the code did not cause anything to jump out as a security hazard, but it seems that at least move_soft_dirty_pte() caused set_pte_at() to use multiple writes. Signed-off-by: Nadav Amit Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Dave Hansen Cc: Andi Kleen Cc: Josh Poimboeuf Cc: Michal Hocko Cc: Vlastimil Babka Cc: Sean Christopherson Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180902181451.80520-1-namit@vmware.com [bwh: Backported to 4.9: - Drop changes in pmdp_establish(), native_set_p4d(), pudp_set_access_flags() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/pgtable_64.h | 18 +++++++++--------- arch/x86/mm/pgtable.c | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 221a32ed1372..f12e61e2a86b 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -44,15 +44,15 @@ struct mm_struct; void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); +static inline void native_set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - *ptep = native_make_pte(0); -} - -static inline void native_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; + native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -62,7 +62,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) @@ -98,7 +98,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) @@ -131,7 +131,7 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = kaiser_set_shadow_pgd(pgdp, pgd); + WRITE_ONCE(*pgdp, kaiser_set_shadow_pgd(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e30baa8ad94f..dff8ac2d255c 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -251,7 +251,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - pgdp[i] = native_make_pgd(0); + pgd_clear(&pgdp[i]); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); @@ -419,7 +419,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(*ptep, entry); if (changed && dirty) { - *ptep = entry; + set_pte(ptep, entry); pte_update(vma->vm_mm, address, ptep); } @@ -436,7 +436,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { - *pmdp = entry; + set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, From c6693781ddaf21dd3746bd74ba0c66e013782b06 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 8 Sep 2017 16:14:33 -0700 Subject: [PATCH 12/77] bitops: avoid integer overflow in GENMASK(_ULL) commit c32ee3d9abd284b4fcaacc250b101f93829c7bae upstream. GENMASK(_ULL) performs a left-shift of ~0UL(L), which technically results in an integer overflow. clang raises a warning if the overflow occurs in a preprocessor expression. Clear the low-order bits through a substraction instead of the left-shift to avoid the overflow. (akpm: no change in .text size in my testing) Link: http://lkml.kernel.org/r/20170803212020.24939-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/bitops.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a83c822c35c2..8fbe259b197c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -19,10 +19,11 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #define GENMASK(h, l) \ - (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ - (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); From ef0efbb7a99a1e51a0391f3fc51f7a3d505c179e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 22 May 2018 11:05:39 +0200 Subject: [PATCH 13/77] x86/speculation: Simplify the CPU bug detection logic commit 8ecc4979b1bd9c94168e6fc92960033b7a951336 upstream. Only CPUs which speculate can speculate. Therefore, it seems prudent to test for cpu_no_speculation first and only then determine whether a specific speculating CPU is susceptible to store bypass speculation. This is underlined by all CPUs currently listed in cpu_no_speculation were present in cpu_no_spec_store_bypass as well. Signed-off-by: Dominik Brodowski Signed-off-by: Thomas Gleixner Cc: bp@suse.de Cc: konrad.wilk@oracle.com Link: https://lkml.kernel.org/r/20180522090539.GA24668@light.dominikbrodowski.net Signed-off-by: Thomas Gleixner Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 59b2dc011f7f..8ddcf026a77d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -909,12 +909,8 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; +/* Only list CPUs which speculate but are non susceptible to SSB */ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, @@ -922,14 +918,10 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_CENTAUR, 5, }, - { X86_VENDOR_INTEL, 5, }, - { X86_VENDOR_NSC, 5, }, { X86_VENDOR_AMD, 0x12, }, { X86_VENDOR_AMD, 0x11, }, { X86_VENDOR_AMD, 0x10, }, { X86_VENDOR_AMD, 0xf, }, - { X86_VENDOR_ANY, 4, }, {} }; @@ -952,6 +944,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); @@ -960,12 +958,6 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); - if (x86_match_cpu(cpu_no_speculation)) - return; - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); From b995196b9da4e2486d50e132539c848a60ea88da Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 13:53:08 +0100 Subject: [PATCH 14/77] locking/atomics, asm-generic: Move some macros from to a new file commit 8bd9cb51daac89337295b6f037b0486911e1b408 upstream. In preparation for implementing the asm-generic atomic bitops in terms of atomic_long_*(), we need to prevent implementations from pulling in . A common reason for this include is for the BITS_PER_BYTE definition, so move this and some other BIT() and masking macros into a new header file, . Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: yamada.masahiro@socionext.com Link: https://lore.kernel.org/lkml/1529412794-17720-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/bitops.h | 22 +--------------------- include/linux/bits.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 include/linux/bits.h diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8fbe259b197c..d4b167fc9ecb 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -1,29 +1,9 @@ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include +#include -#ifdef __KERNEL__ -#define BIT(nr) (1UL << (nr)) -#define BIT_ULL(nr) (1ULL << (nr)) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) -#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) -#define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#endif - -/* - * Create a contiguous bitmask starting at bit position @l and ending at - * position @h. For example - * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(h, l) \ - (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) - -#define GENMASK_ULL(h, l) \ - (((~0ULL) - (1ULL << (l)) + 1) & \ - (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/bits.h b/include/linux/bits.h new file mode 100644 index 000000000000..2b7b532c1d51 --- /dev/null +++ b/include/linux/bits.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITS_H +#define __LINUX_BITS_H +#include + +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) +#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) +#define BITS_PER_BYTE 8 + +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* __LINUX_BITS_H */ From 26d422c046c3f8620642ad9fde8aa36867b820c6 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Wed, 18 Jul 2018 08:03:14 +0800 Subject: [PATCH 15/77] x86/speculation: Remove SPECTRE_V2_IBRS in enum spectre_v2_mitigation commit d9f4426c73002957be5dd39936f44a09498f7560 upstream. SPECTRE_V2_IBRS in enum spectre_v2_mitigation is never used. Remove it. Signed-off-by: Jiang Biao Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: dwmw2@amazon.co.uk Cc: konrad.wilk@oracle.com Cc: bp@suse.de Cc: zhong.weidong@zte.com.cn Link: https://lkml.kernel.org/r/1531872194-39207-1-git-send-email-jiang.biao2@zte.com.cn [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1b4132161c1f..d15c352db687 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -214,7 +214,6 @@ enum spectre_v2_mitigation { SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS, SPECTRE_V2_IBRS_ENHANCED, }; From 1739ba8b00408396192ff476383e608ab5d33694 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 7 Aug 2018 10:17:27 -0700 Subject: [PATCH 16/77] x86/cpu: Sanitize FAM6_ATOM naming commit f2c4db1bd80720cd8cb2a5aa220d9bc9f374f04e upstream. Going primarily by: https://en.wikipedia.org/wiki/List_of_Intel_Atom_microprocessors with additional information gleaned from other related pages; notably: - Bonnell shrink was called Saltwell - Moorefield is the Merriefield refresh which makes it Airmont The general naming scheme is: FAM6_ATOM_UARCH_SOCTYPE for i in `git grep -l FAM6_ATOM` ; do sed -i -e 's/ATOM_PINEVIEW/ATOM_BONNELL/g' \ -e 's/ATOM_LINCROFT/ATOM_BONNELL_MID/' \ -e 's/ATOM_PENWELL/ATOM_SALTWELL_MID/g' \ -e 's/ATOM_CLOVERVIEW/ATOM_SALTWELL_TABLET/g' \ -e 's/ATOM_CEDARVIEW/ATOM_SALTWELL/g' \ -e 's/ATOM_SILVERMONT1/ATOM_SILVERMONT/g' \ -e 's/ATOM_SILVERMONT2/ATOM_SILVERMONT_X/g' \ -e 's/ATOM_MERRIFIELD/ATOM_SILVERMONT_MID/g' \ -e 's/ATOM_MOOREFIELD/ATOM_AIRMONT_MID/g' \ -e 's/ATOM_DENVERTON/ATOM_GOLDMONT_X/g' \ -e 's/ATOM_GEMINI_LAKE/ATOM_GOLDMONT_PLUS/g' ${i} done Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: dave.hansen@linux.intel.com Cc: len.brown@intel.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner [bwh: Backported to 4.9: - Drop changes to CPU IDs that weren't already included - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 18 ++++++++------ arch/x86/events/intel/cstate.c | 4 +-- arch/x86/events/msr.c | 4 +-- arch/x86/include/asm/intel-family.h | 30 +++++++++++++---------- arch/x86/kernel/cpu/common.c | 28 ++++++++++----------- arch/x86/kernel/tsc.c | 2 +- arch/x86/platform/atom/punit_atom_debug.c | 4 +-- drivers/acpi/acpi_lpss.c | 2 +- drivers/cpufreq/intel_pstate.c | 2 +- drivers/idle/intel_idle.c | 14 +++++------ drivers/mmc/host/sdhci-acpi.c | 2 +- drivers/pci/pci-mid.c | 4 +-- drivers/powercap/intel_rapl.c | 8 +++--- drivers/thermal/intel_soc_dts_thermal.c | 2 +- 14 files changed, 65 insertions(+), 59 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a30829052a00..cb8178a2783a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3750,11 +3750,11 @@ __init int intel_pmu_init(void) pr_cont("Nehalem events, "); break; - case INTEL_FAM6_ATOM_PINEVIEW: - case INTEL_FAM6_ATOM_LINCROFT: - case INTEL_FAM6_ATOM_PENWELL: - case INTEL_FAM6_ATOM_CLOVERVIEW: - case INTEL_FAM6_ATOM_CEDARVIEW: + case INTEL_FAM6_ATOM_BONNELL: + case INTEL_FAM6_ATOM_BONNELL_MID: + case INTEL_FAM6_ATOM_SALTWELL: + case INTEL_FAM6_ATOM_SALTWELL_MID: + case INTEL_FAM6_ATOM_SALTWELL_TABLET: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -3766,9 +3766,11 @@ __init int intel_pmu_init(void) pr_cont("Atom events, "); break; - case INTEL_FAM6_ATOM_SILVERMONT1: - case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_X: + case INTEL_FAM6_ATOM_SILVERMONT_MID: case INTEL_FAM6_ATOM_AIRMONT: + case INTEL_FAM6_ATOM_AIRMONT_MID: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -3785,7 +3787,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_DENVERTON: + case INTEL_FAM6_ATOM_GOLDMONT_X: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 47d526c700a1..72d09340c24d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -531,8 +531,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index be0b1968d60a..68144a341903 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -61,8 +61,8 @@ static bool test_intel(int idx) case INTEL_FAM6_BROADWELL_GT3E: case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_ATOM_SILVERMONT1: - case INTEL_FAM6_ATOM_SILVERMONT2: + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_X: case INTEL_FAM6_ATOM_AIRMONT: if (idx == PERF_MSR_SMI) return true; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 75b748a1deb8..ba7b6f736414 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -50,19 +50,23 @@ /* "Small Core" Processors (Atom) */ -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C -#define INTEL_FAM6_ATOM_LINCROFT 0x26 -#define INTEL_FAM6_ATOM_PENWELL 0x27 -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ + +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ + +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ + +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ + +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ /* Xeon Phi */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8ddcf026a77d..cf3ee870f928 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -892,11 +892,11 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) } static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, { X86_VENDOR_CENTAUR, 5 }, { X86_VENDOR_INTEL, 5 }, { X86_VENDOR_NSC, 5 }, @@ -911,10 +911,10 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { /* Only list CPUs which speculate but are non susceptible to SSB */ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, @@ -927,14 +927,14 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, {} diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 769c370011d6..cb768417429d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -713,7 +713,7 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_ATOM_DENVERTON: + case INTEL_FAM6_ATOM_GOLDMONT_X: crystal_khz = 25000; /* 25.0 MHz */ break; case INTEL_FAM6_ATOM_GOLDMONT: diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index d49d3be81953..ecb5866aaf84 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -154,8 +154,8 @@ static void punit_dbgfs_unregister(void) (kernel_ulong_t)&drv_data } static const struct x86_cpu_id intel_punit_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng), + ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt), + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng), ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), {} }; diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 957d3fa3b543..8e38249311bd 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -243,7 +243,7 @@ static const struct lpss_device_desc bsw_spi_dev_desc = { #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } static const struct x86_cpu_id lpss_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_SILVERMONT1), /* Valleyview, Bay Trail */ + ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */ ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ {} }; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f690085b1ad9..4fe999687415 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1413,7 +1413,7 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(INTEL_FAM6_SANDYBRIDGE, core_params), ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_params), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_params), + ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_params), ICPU(INTEL_FAM6_IVYBRIDGE, core_params), ICPU(INTEL_FAM6_HASWELL_CORE, core_params), ICPU(INTEL_FAM6_BROADWELL_CORE, core_params), diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5ded9b22b015..a6fa32c7e068 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1107,14 +1107,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), + ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom), + ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft), ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), + ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom), + ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt), + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier), ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), @@ -1122,7 +1122,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), + ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn), ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), @@ -1134,7 +1134,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), + ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv), {} }; diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 80918abfc468..4398398c0935 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -127,7 +127,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { static bool sdhci_acpi_byt(void) { static const struct x86_cpu_id byt[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, {} }; diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index c7f3408e3148..54b3f9bc5ad8 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -71,8 +71,8 @@ static struct pci_platform_pm_ops mid_pci_platform_pm = { * arch/x86/platform/intel-mid/pwr.c. */ static const struct x86_cpu_id lpss_cpu_ids[] = { - ICPU(INTEL_FAM6_ATOM_PENWELL), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD), + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID), + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID), {} }; diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 3c71f608b444..8809c1a20bed 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -1175,12 +1175,12 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { RAPL_CPU(INTEL_FAM6_KABYLAKE_MOBILE, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt), + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT, rapl_defaults_byt), RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht), - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng), - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann), + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT_MID,rapl_defaults_tng), + RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT_MID, rapl_defaults_ann), RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core), - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core), + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_X, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server), {} diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c index b2bbaa1c60b0..18788109cae6 100644 --- a/drivers/thermal/intel_soc_dts_thermal.c +++ b/drivers/thermal/intel_soc_dts_thermal.c @@ -43,7 +43,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) } static const struct x86_cpu_id soc_thermal_ids[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0, BYT_SOC_DTS_APIC_IRQ}, {} }; From 29d4af1f21524173ac3266c45638e764d3c0d077 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Wed, 15 Aug 2018 07:46:04 +0200 Subject: [PATCH 17/77] Documentation/l1tf: Fix small spelling typo commit 60ca05c3b44566b70d64fbb8e87a6e0c67725468 upstream. Fix small typo (wiil -> will) in the "3.4. Nested virtual machines" section. Fixes: 5b76a3cff011 ("KVM: VMX: Tell the nested hypervisor to skip L1D flush on vmentry") Cc: linux-kernel@vger.kernel.org Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Paolo Bonzini Cc: Greg Kroah-Hartman Cc: Tony Luck Cc: linux-doc@vger.kernel.org Cc: trivial@kernel.org Signed-off-by: Salvatore Bonaccorso Signed-off-by: Jonathan Corbet Signed-off-by: Thomas Gleixner [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/l1tf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst index bae52b845de0..b85dd80510b0 100644 --- a/Documentation/l1tf.rst +++ b/Documentation/l1tf.rst @@ -553,7 +553,7 @@ When nested virtualization is in use, three operating systems are involved: the bare metal hypervisor, the nested hypervisor and the nested virtual machine. VMENTER operations from the nested hypervisor into the nested guest will always be processed by the bare metal hypervisor. If KVM is the -bare metal hypervisor it wiil: +bare metal hypervisor it will: - Flush the L1D cache on every switch from the nested hypervisor to the nested virtual machine, so that the nested hypervisor's secrets are not From 822e5d5358bb945c5a22f7de50de307c8a782dbe Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:18 +0200 Subject: [PATCH 18/77] x86/speculation: Apply IBPB more strictly to avoid cross-process data leak commit dbfe2953f63c640463c630746cd5d9de8b2f63ae upstream. Currently, IBPB is only issued in cases when switching into a non-dumpable process, the rationale being to protect such 'important and security sensitive' processess (such as GPG) from data leaking into a different userspace process via spectre v2. This is however completely insufficient to provide proper userspace-to-userpace spectrev2 protection, as any process can poison branch buffers before being scheduled out, and the newly scheduled process immediately becomes spectrev2 victim. In order to minimize the performance impact (for usecases that do require spectrev2 protection), issue the barrier only in cases when switching between processess where the victim can't be ptraced by the potential attacker (as in such cases, the attacker doesn't have to bother with branch buffers at all). [ tglx: Split up PTRACE_MODE_NOACCESS_CHK into PTRACE_MODE_SCHED and PTRACE_MODE_IBPB to be able to do ptrace() context tracking reasonably fine-grained ] Fixes: 18bf3c3ea8 ("x86/speculation: Use Indirect Branch Prediction Barrier in context switch") Originally-by: Tim Chen Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251437340.15880@cbobk.fhfr.pm Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 31 ++++++++++++++++++++----------- include/linux/ptrace.h | 21 +++++++++++++++++++-- kernel/ptrace.c | 10 ++++++++++ 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index eac92e2d171b..ff8f8e529317 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,19 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, local_irq_restore(flags); } +static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +{ + /* + * Check if the current (previous) task has access to the memory + * of the @tsk (next) task. If access is denied, make sure to + * issue a IBPB to stop user->user Spectre-v2 attacks. + * + * Note: __ptrace_may_access() returns 0 or -ERRNO. + */ + return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && + ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); +} + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -115,18 +129,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * one process from doing Spectre-v2 attacks on another. * * As an optimization, flush indirect branches only when - * switching into processes that disable dumping. This - * protects high value processes like gpg, without having - * too high performance overhead. IBPB is *expensive*! - * - * This will not flush branches when switching into kernel - * threads. It will also not flush if we switch to idle - * thread and back to the same process. It will flush if we - * switch to a different non-dumpable process. + * switching into a processes that can't be ptrace by the + * current one (as in such case, attacker has much more + * convenient way how to tamper with the next process than + * branch buffer poisoning). */ - if (tsk && tsk->mm && - tsk->mm->context.ctx_id != last_ctx_id && - get_dumpable(tsk->mm) != SUID_DUMP_USER) + if (static_cpu_has(X86_FEATURE_USE_IBPB) && + ibpb_needed(tsk, last_ctx_id)) indirect_branch_prediction_barrier(); if (IS_ENABLED(CONFIG_VMAP_STACK)) { diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index d53a23100401..58ae371556bc 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -60,14 +60,17 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -#define PTRACE_MODE_FSCREDS 0x08 -#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 +#define PTRACE_MODE_SCHED 0x20 +#define PTRACE_MODE_IBPB 0x40 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) /** * ptrace_may_access - check whether the caller is permitted to access @@ -85,6 +88,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * Similar to ptrace_may_access(). Only to be called from context switch + * code. Does not call into audit and the regular LSM hooks due to locking + * constraints. + */ +extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); + static inline int ptrace_reparented(struct task_struct *child) { return !same_thread_group(child->real_parent, child->parent); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f39a7be98fc1..efba851ee018 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -258,6 +258,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { + if (mode & PTRACE_MODE_SCHED) + return false; + if (mode & PTRACE_MODE_NOAUDIT) return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); else @@ -325,9 +328,16 @@ ok: !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; + if (mode & PTRACE_MODE_SCHED) + return 0; return security_ptrace_access_check(task, mode); } +bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) +{ + return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); +} + bool ptrace_may_access(struct task_struct *task, unsigned int mode) { int err; From b410c57f4907dcf23a29f46f15b081fb404d7f4d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:38:55 +0200 Subject: [PATCH 19/77] x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation commit 53c613fe6349994f023245519265999eed75957f upstream. STIBP is a feature provided by certain Intel ucodes / CPUs. This feature (once enabled) prevents cross-hyperthread control of decisions made by indirect branch predictors. Enable this feature if - the CPU is vulnerable to spectre v2 - the CPU supports SMT and has SMT siblings online - spectre_v2 mitigation autoselection is enabled (default) After some previous discussion, this leaves STIBP on all the time, as wrmsr on crossing kernel boundary is a no-no. This could perhaps later be a bit more optimized (like disabling it in NOHZ, experiment with disabling it in idle, etc) if needed. Note that the synchronization of the mask manipulation via newly added spec_ctrl_mutex is currently not strictly needed, as the only updater is already being serialized by cpu_add_remove_lock, but let's make this a little bit more future-proof. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438240.15880@cbobk.fhfr.pm Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 57 ++++++++++++++++++++++++++++++++++---- kernel/cpu.c | 11 +++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b6438dfba461..fc1c1cbc3627 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -33,12 +33,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); -/* - * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any - * writes to SPEC_CTRL contain whatever reserved bits have been set. - */ -u64 __ro_after_init x86_spec_ctrl_base; +/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); +static DEFINE_MUTEX(spec_ctrl_mutex); /* * The vendor and possibly platform specific bits which can be modified in @@ -323,6 +321,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + mask = x86_spec_ctrl_base; + if (cpu_smt_control == CPU_SMT_ENABLED) + mask |= SPEC_CTRL_STIBP; + else + mask &= ~SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + cpu_smt_control == CPU_SMT_ENABLED ? + "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -422,6 +460,9 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + + /* Enable STIBP if appropriate */ + arch_smt_update(); } #undef pr_fmt @@ -812,6 +853,8 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { + int ret; + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -826,10 +869,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", spectre_v2_module_string()); + return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/kernel/cpu.c b/kernel/cpu.c index bf24e8400903..6edf43276556 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1986,6 +1986,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { }; + static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; @@ -2012,8 +2018,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) */ cpuhp_offline_cpu_device(cpu); } - if (!ret) + if (!ret) { cpu_smt_control = ctrlval; + arch_smt_update(); + } cpu_maps_update_done(); return ret; } @@ -2024,6 +2032,7 @@ static int cpuhp_smt_enable(void) cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; + arch_smt_update(); for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) From 787b367ecab5e9e722ddd257bf21a90c370eab95 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 25 Sep 2018 14:39:28 +0200 Subject: [PATCH 20/77] x86/speculation: Propagate information about RSB filling mitigation to sysfs commit bb4b3b7762735cdaba5a40fd94c9303d9ffa147a upstream. If spectrev2 mitigation has been enabled, RSB is filled on context switch in order to protect from various classes of spectrev2 attacks. If this mitigation is enabled, say so in sysfs for spectrev2. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: "WoodhouseDavid" Cc: Andi Kleen Cc: Tim Chen Cc: "SchauflerCasey" Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251438580.15880@cbobk.fhfr.pm Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fc1c1cbc3627..03ebc0adcd82 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -869,10 +869,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); return ret; From c36925835c8f8e0c9d237fd67b54878e0d3476a9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 13 Nov 2018 19:49:10 +0100 Subject: [PATCH 21/77] x86/speculation/l1tf: Drop the swap storage limit restriction when l1tf=off commit 5b5e4d623ec8a34689df98e42d038a3b594d2ff9 upstream. Swap storage is restricted to max_swapfile_size (~16TB on x86_64) whenever the system is deemed affected by L1TF vulnerability. Even though the limit is quite high for most deployments it seems to be too restrictive for deployments which are willing to live with the mitigation disabled. We have a customer to deploy 8x 6,4TB PCIe/NVMe SSD swap devices which is clearly out of the limit. Drop the swap restriction when l1tf=off is specified. It also doesn't make much sense to warn about too much memory for the l1tf mitigation when it is forcefully disabled by the administrator. [ tglx: Folded the documentation delta change ] Fixes: 377eeaa8e11f ("x86/speculation/l1tf: Limit swap file size to MAX_PA/2") Signed-off-by: Michal Hocko Signed-off-by: Thomas Gleixner Reviewed-by: Pavel Tatashin Reviewed-by: Andi Kleen Acked-by: Jiri Kosina Cc: Linus Torvalds Cc: Dave Hansen Cc: Andi Kleen Cc: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20181113184910.26697-1-mhocko@kernel.org [bwh: Backported to 4.9: adjust filenames, context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 3 +++ Documentation/l1tf.rst | 6 +++++- arch/x86/kernel/cpu/bugs.c | 3 ++- arch/x86/mm/init.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a1472b48ee22..18cfc4998481 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2076,6 +2076,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. off Disables hypervisor mitigations and doesn't emit any warnings. + It also drops the swap size and available + RAM limit restriction on both hypervisor and + bare metal. Default is 'flush'. diff --git a/Documentation/l1tf.rst b/Documentation/l1tf.rst index b85dd80510b0..9af977384168 100644 --- a/Documentation/l1tf.rst +++ b/Documentation/l1tf.rst @@ -405,6 +405,9 @@ time with the option "l1tf=". The valid arguments for this option are: off Disables hypervisor mitigations and doesn't emit any warnings. + It also drops the swap size and available RAM limit restrictions + on both hypervisor and bare metal. + ============ ============================================================= The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. @@ -576,7 +579,8 @@ Default mitigations The kernel default mitigations for vulnerable processors are: - PTE inversion to protect against malicious user space. This is done - unconditionally and cannot be controlled. + unconditionally and cannot be controlled. The swap storage is limited + to ~16TB. - L1D conditional flushing on VMENTER when EPT is enabled for a guest. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03ebc0adcd82..803234b1845f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -775,7 +775,8 @@ static void __init l1tf_select_mitigation(void) #endif half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; - if (e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { + if (l1tf_mitigation != L1TF_MITIGATION_OFF && + e820_any_mapped(half_pa, ULLONG_MAX - half_pa, E820_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 90801a8f19c9..ce092a62fc5d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -790,7 +790,7 @@ unsigned long max_swapfile_size(void) pages = generic_max_swapfile_size(); - if (boot_cpu_has_bug(X86_BUG_L1TF)) { + if (boot_cpu_has_bug(X86_BUG_L1TF) && l1tf_mitigation != L1TF_MITIGATION_OFF) { /* Limit the swap file size to MAX_PA/2 for L1TF workaround */ unsigned long long l1tf_limit = l1tf_pfn_limit(); /* From e8891b7227dffa563daf514fca3e22f5264b9776 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:29 +0100 Subject: [PATCH 22/77] x86/speculation: Update the TIF_SSBD comment commit 8eb729b77faf83ac4c1f363a9ad68d042415f24c upstream. "Reduced Data Speculation" is an obsolete term. The correct new name is "Speculative store bypass disable" - which is abbreviated into SSBD. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2d8788a59b4d..28f3191415be 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ From 61549811fcbdfd33e90b86703f97e03af9f6fbdb Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:30 +0100 Subject: [PATCH 23/77] x86/speculation: Clean up spectre_v2_parse_cmdline() commit 24848509aa55eac39d524b587b051f4e86df3c12 upstream. Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline() to save an indentation level. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 803234b1845f..62da71f523b6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -281,22 +281,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || From 66c0d89b81a051ac4df051fcf770b5eb7f208200 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:31 +0100 Subject: [PATCH 24/77] x86/speculation: Remove unnecessary ret variable in cpu_show_common() commit b86bda0426853bfe8a3506c7d2a5b332760ae46b upstream. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 62da71f523b6..7b47cdf6414e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -853,8 +853,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -869,13 +867,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); From 20ba13aef2628548b515b4dc31ca5a5f2baa9bbd Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:32 +0100 Subject: [PATCH 25/77] x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common() commit a8f76ae41cd633ac00be1b3019b1eb4741be3828 upstream. The Spectre V2 printout in cpu_show_common() handles conditionals for the various mitigation methods directly in the sprintf() argument list. That's hard to read and will become unreadable if more complex decisions need to be made for a particular method. Move the conditionals for STIBP and IBPB string selection into helper functions, so they can be extended later on. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7b47cdf6414e..64e5f64c3b38 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -850,6 +850,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) + return ", STIBP"; + else + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_USE_IBPB)) + return ", IBPB"; + else + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -868,9 +884,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); From 8a7723de5e1a57c394d885e731af6ebba990f110 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:33 +0100 Subject: [PATCH 26/77] x86/speculation: Disable STIBP when enhanced IBRS is in use commit 34bce7c9690b1d897686aac89604ba7adc365556 upstream. If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2 user space exploits from hyperthread sibling. Disable STIBP when enhanced IBRS is used. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 64e5f64c3b38..364fd880feef 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -325,6 +325,10 @@ static bool stibp_needed(void) if (spectre_v2_enabled == SPECTRE_V2_NONE) return false; + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + if (!boot_cpu_has(X86_FEATURE_STIBP)) return false; @@ -852,6 +856,9 @@ static ssize_t l1tf_show_state(char *buf) static char *stibp_state(void) { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) return ", STIBP"; else From fd8d77ee819fa2a56a26c54b894d664ec677bb6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:34 +0100 Subject: [PATCH 27/77] x86/speculation: Rename SSBD update functions commit 26c4d75b234040c11728a8acb796b3a85ba7507c upstream. During context switch, the SSBD bit in SPEC_CTRL MSR is updated according to changes of the TIF_SSBD flag in the current and next running task. Currently, only the bit controlling speculative store bypass disable in SPEC_CTRL MSR is updated and the related update functions all have "speculative_store" or "ssb" in their names. For enhanced mitigation control other bits in SPEC_CTRL MSR need to be updated as well, which makes the SSB names inadequate. Rename the "speculative_store*" functions to a more generic name. No functional change. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kernel/process.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..8e2f8411c7a7 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); +extern void speculation_ctrl_update(unsigned long tif); -static inline void speculative_store_bypass_update_current(void) +static inline void speculation_ctrl_update_current(void) { - speculative_store_bypass_update(current_thread_info()->flags); + speculation_ctrl_update(current_thread_info()->flags); } #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 364fd880feef..8c108343f7ae 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -200,7 +200,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -637,7 +637,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update_current(); + speculation_ctrl_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 00a9047539d7..36ab44270a82 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -321,27 +321,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +static __always_inline void spec_ctrl_update_msr(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static __always_inline void __speculation_ctrl_update(unsigned long tifn) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) amd_set_ssb_virt_state(tifn); else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else - intel_set_ssb_state(tifn); + spec_ctrl_update_msr(tifn); } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(tif); preempt_enable(); } @@ -375,7 +375,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, cr4_toggle_bits(X86_CR4_TSD); if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + __speculation_ctrl_update(tifn); } /* From dbbc533a9b4a82c18aba36129bb1513ac90f4bc6 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:35 +0100 Subject: [PATCH 28/77] x86/speculation: Reorganize speculation control MSRs update commit 01daf56875ee0cd50ed496a09b20eb369b45dfa5 upstream. The logic to detect whether there's a change in the previous and next task's flag relevant to update speculation control MSRs is spread out across multiple functions. Consolidate all checks needed for updating speculation control MSRs into the new __speculation_ctrl_update() helper function. This makes it easy to pick the right speculation control MSR and the bits in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes. Originally-by: Thomas Lendacky Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process.c | 42 +++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 36ab44270a82..8ab00560e27d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -321,27 +321,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void spec_ctrl_update_msr(unsigned long tifn) +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) { - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -} + /* If TIF_SSBD is different, select the proper mitigation method */ + if ((tifp ^ tifn) & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } -static __always_inline void __speculation_ctrl_update(unsigned long tifn) -{ - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - spec_ctrl_update_msr(tifn); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculation_ctrl_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } @@ -374,8 +387,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculation_ctrl_update(tifn); + __speculation_ctrl_update(tifp, tifn); } /* From 4cc154901e47d56e7491d37f7a81768ddb96e733 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:37 +0100 Subject: [PATCH 29/77] x86/Kconfig: Select SCHED_SMT if SMP enabled commit dbe733642e01dd108f71436aaea7b328cb28fd87 upstream. CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to have it configurable. The runtime overhead in the core scheduler code is minimal because the actual SMT scheduling parts are conditional on a static key. This allows to expose the scheduler's SMT state static key to the speculation control code. Alternatively the scheduler's static key could be made always available when CONFIG_SMP is enabled, but that's just adding an unused static key to every other architecture for nothing. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5a4591ff8407..e0055b4302d6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -937,13 +937,7 @@ config NR_CPUS approximately eight kilobytes to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y From c803409910a6e2ca70c3edd557aeda0055827f7a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 May 2019 00:46:25 +0100 Subject: [PATCH 30/77] sched: Add sched_smt_active() Add the sched_smt_active() function needed for some x86 speculation mitigations. This was introduced upstream by commits 1b568f0aabf2 "sched/core: Optimize SCHED_SMT", ba2591a5993e "sched/smt: Update sched_smt_present at runtime", c5511d03ec09 "sched/smt: Make sched_smt_present track topology", and 321a874a7ef8 "sched/smt: Expose sched_smt_present static key". The upstream implementation uses the static_key_{disable,enable}_cpuslocked() functions, which aren't practical to backport. Signed-off-by: Ben Hutchings Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra (Intel) Cc: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- include/linux/sched/smt.h | 18 ++++++++++++++++++ kernel/sched/core.c | 19 +++++++++++++++++++ kernel/sched/sched.h | 1 + 3 files changed, 38 insertions(+) create mode 100644 include/linux/sched/smt.h diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h new file mode 100644 index 000000000000..5209c268c6fd --- /dev/null +++ b/include/linux/sched/smt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SCHED_SMT_H +#define _LINUX_SCHED_SMT_H + +#include + +#ifdef CONFIG_SCHED_SMT +extern atomic_t sched_smt_present; + +static __always_inline bool sched_smt_active(void) +{ + return atomic_read(&sched_smt_present); +} +#else +static inline bool sched_smt_active(void) { return false; } +#endif + +#endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6b3fff6a6437..50e80b1be2c8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7355,11 +7355,22 @@ static int cpuset_cpu_inactive(unsigned int cpu) return 0; } +#ifdef CONFIG_SCHED_SMT +atomic_t sched_smt_present = ATOMIC_INIT(0); +#endif + int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; +#ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_inc(&sched_smt_present); +#endif set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -7408,6 +7419,14 @@ int sched_cpu_deactivate(unsigned int cpu) else synchronize_rcu(); +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + atomic_dec(&sched_smt_present); +#endif + if (!sched_smp_initialized) return 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ec6e838e991a..15c08752926b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include From a3c901bfdb2e37f281cc8087d5a01bb35da64b20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: [PATCH 31/77] x86/speculation: Rework SMT state change commit a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f upstream. arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 11 +++++------ include/linux/sched/smt.h | 2 ++ kernel/cpu.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8c108343f7ae..6ae729caf75e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -348,16 +349,14 @@ void arch_smt_update(void) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; if (mask != x86_spec_ctrl_base) { pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); x86_spec_ctrl_base = mask; on_each_cpu(update_stibp_msr, NULL, 1); } diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index 5209c268c6fd..559ac4590593 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void) static inline bool sched_smt_active(void) { return false; } #endif +void arch_smt_update(void); + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 6edf43276556..a540ca031288 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -356,6 +357,12 @@ void cpu_hotplug_enable(void) EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); @@ -1058,6 +1065,7 @@ out: /* This post dead nonsense must die */ if (!ret && hasdied) cpu_notify_nofail(CPU_POST_DEAD, cpu); + arch_smt_update(); return ret; } @@ -1177,6 +1185,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) ret = cpuhp_up_callbacks(cpu, st, target); out: cpu_hotplug_done(); + arch_smt_update(); return ret; } @@ -1986,12 +1995,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; From 9d6f23fae003031ba7aa2696075ad7e70310bd84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:40 +0100 Subject: [PATCH 32/77] x86/l1tf: Show actual SMT state commit 130d6f946f6f2a972ee3ec8540b7243ab99abe97 upstream. Use the now exposed real SMT state, not the SMT sysfs control knob state. This reflects the state of the system when the mitigation status is queried. This does not change the warning in the VMX launch code. There the dependency on the control knob makes sense because siblings could be brought online anytime after launching the VM. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6ae729caf75e..2ae8cdc8b68c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -838,13 +838,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) From 5fdb12373d68cd4a31fb33e1ccffe84c5b35f077 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:41 +0100 Subject: [PATCH 33/77] x86/speculation: Reorder the spec_v2 code commit 15d6b7aab0793b2de8a05d8a828777dd24db424e upstream. Reorder the code so it is better grouped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de [bwh: Backported to 4.9: - We still have the minimal mitigation modes - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 176 ++++++++++++++++++------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2ae8cdc8b68c..5b8551aacb18 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -122,31 +122,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -216,6 +191,12 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -237,6 +218,45 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) @@ -254,31 +274,11 @@ static inline bool retp_compiler(void) return __is_defined(RETPOLINE); } -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - -static const struct { - const char *option; - enum spectre_v2_mitigation_cmd cmd; - bool secure; -} mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, -}; - static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; @@ -321,48 +321,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -467,6 +425,48 @@ specv2_set_mode: arch_smt_update(); } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt From 8d33157c63a01bcdca9ca003f4fa238565a367a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:42 +0100 Subject: [PATCH 34/77] x86/speculation: Mark string arrays const correctly commit 8770709f411763884535662744a3786a1806afd3 upstream. checkpatch.pl muttered when reshuffling the code: WARNING: static const char * array should probably be static const char * const Fix up all the string arrays. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5b8551aacb18..f5bb620f7882 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -235,7 +235,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; -static const char *spectre_v2_strings[] = { +static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", @@ -481,7 +481,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -822,7 +822,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", From d0737990d2e7ab8c73dad92251207149bdd556bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:43 +0100 Subject: [PATCH 35/77] x86/speculataion: Mark command line parser data __initdata commit 30ba72a990f5096ae08f284de17986461efcc408 upstream. No point to keep that around. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f5bb620f7882..62f1b2e2104e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -248,7 +248,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { +} mitigation_options[] __initdata = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -491,7 +491,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ From d343a9412cc86aff1a8cbaa90d7b048dc785d0e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:44 +0100 Subject: [PATCH 36/77] x86/speculation: Unify conditional spectre v2 print functions commit 495d470e9828500e0155027f230449ac5e29c025 upstream. There is no point in having two functions and a conditional at the call site. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 62f1b2e2104e..cebaaa9f9424 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -257,15 +257,9 @@ static const struct { { "auto", SPECTRE_V2_CMD_AUTO, false }, }; -static void __init spec2_print_if_insecure(const char *reason) +static void __init spec_v2_print_cond(const char *reason, bool secure) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) pr_info("%s selected on command line.\n", reason); } @@ -313,11 +307,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } From dda365c4d0e911c6c63e580c284969069db3c63d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:45 +0100 Subject: [PATCH 37/77] x86/speculation: Add command line control for indirect branch speculation commit fa1202ef224391b6f5b26cdd44cc50495e8fab54 upstream. Add command line control for user space indirect branch speculation mitigations. The new option is: spectre_v2_user= The initial options are: - on: Unconditionally enabled - off: Unconditionally disabled -auto: Kernel selects mitigation (default off for now) When the spectre_v2= command line argument is either 'on' or 'off' this implies that the application to application control follows that state even if a contradicting spectre_v2_user= argument is supplied. Originally-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 32 ++++++- arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 133 +++++++++++++++++++++++---- 3 files changed, 156 insertions(+), 19 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 18cfc4998481..cef2cb9cbf8a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4033,9 +4033,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. + The default operation protects the kernel from + user space attacks. - on - unconditionally enable - off - unconditionally disable + on - unconditionally enable, implies + spectre_v2_user=on + off - unconditionally disable, implies + spectre_v2_user=off auto - kernel detects whether your CPU model is vulnerable @@ -4045,6 +4049,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. CONFIG_RETPOLINE configuration option, and the compiler with which the kernel was built. + Selecting 'on' will also enable the mitigation + against user space to user space task attacks. + + Selecting 'off' will disable both the kernel and + the user space protections. + Specific mitigations can also be selected manually: retpoline - replace indirect branches @@ -4054,6 +4064,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Not specifying this option is equivalent to spectre_v2=auto. + spectre_v2_user= + [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability between + user space tasks + + on - Unconditionally enable mitigations. Is + enforced by spectre_v2=on + + off - Unconditionally disable mitigations. Is + enforced by spectre_v2=off + + auto - Kernel selects the mitigation depending on + the available CPU features and vulnerability. + Default is off. + + Not specifying this option is equivalent to + spectre_v2_user=auto. + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d15c352db687..dab01da02de4 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include + #include #include #include @@ -217,6 +219,12 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -294,6 +302,8 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cebaaa9f9424..8e414c416808 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -52,6 +52,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); + void __init check_bugs(void) { identify_boot_cpu(); @@ -197,6 +200,9 @@ static void x86_amd_ssb_disable(void) static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -235,6 +241,104 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); +} + +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +{ + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; +} + +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +{ + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); +} + static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", @@ -390,12 +494,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -412,23 +510,21 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } static bool stibp_needed(void) { - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - /* Enhanced IBRS makes using STIBP unnecessary. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return false; - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; + /* Check for strict user mitigation mode */ + return spectre_v2_user == SPECTRE_V2_USER_STRICT; } static void update_stibp_msr(void *info) @@ -850,10 +946,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) - return ", STIBP"; - else - return ""; + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + } + return ""; } static char *ibpb_state(void) From a35a8c64221afba50d76571d96fc4563c64db81e Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:46 +0100 Subject: [PATCH 38/77] x86/speculation: Prepare for per task indirect branch speculation control commit 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba upstream. To avoid the overhead of STIBP always on, it's necessary to allow per task control of STIBP. Add a new task flag TIF_SPEC_IB and evaluate it during context switch if SMT is active and flag evaluation is enabled by the speculation control code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the guest/host switch works properly. This has no effect because TIF_SPEC_IB cannot be set yet and the static key which controls evaluation is off. Preparatory patch for adding the control code. [ tglx: Simplify the context switch logic and make the TIF evaluation depend on SMP=y and on the static key controlling the conditional update. Rename it to TIF_SPEC_IB because it controls both STIBP and IBPB ] Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/include/asm/spec-ctrl.h | 12 ++++++++++++ arch/x86/include/asm/thread_info.h | 5 ++++- arch/x86/kernel/cpu/bugs.c | 4 ++++ arch/x86/kernel/process.c | 20 ++++++++++++++++++-- 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9963e21ac443..d7813b118ac8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -39,9 +39,10 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 8e2f8411c7a7..27b0bce3933b 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 28f3191415be..389b95bb1456 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -87,6 +87,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -111,6 +112,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -141,7 +143,8 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD|_TIF_SPEC_IB) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8e414c416808..c52d5596d943 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -146,6 +146,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8ab00560e27d..5111e107a902 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -330,11 +330,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) static __always_inline void __speculation_ctrl_update(unsigned long tifp, unsigned long tifn) { + unsigned long tif_diff = tifp ^ tifn; u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* If TIF_SSBD is different, select the proper mitigation method */ - if ((tifp ^ tifn) & _TIF_SSBD) { + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { amd_set_ssb_virt_state(tifn); } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { @@ -346,6 +352,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } } + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } + if (updmsr) wrmsrl(MSR_IA32_SPEC_CTRL, msr); } From b5741ef7591dad04afd67b3ea14265847033a652 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:47 +0100 Subject: [PATCH 39/77] x86/process: Consolidate and simplify switch_to_xtra() code commit ff16701a29cba3aafa0bd1656d766813b2d0a811 upstream. Move the conditional invocation of __switch_to_xtra() into an inline function so the logic can be shared between 32 and 64 bit. Remove the handthrough of the TSS pointer and retrieve the pointer directly in the bitmap handling function. Use this_cpu_ptr() instead of the per_cpu() indirection. This is a preparatory change so integration of conditional indirect branch speculation optimization happens only in one place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de [bwh: Backported to 4.9: - Use cpu_tss instead of cpu_tss_rw - __switch_to() still uses the tss variable, so don't delete it - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/switch_to.h | 3 --- arch/x86/kernel/process.c | 12 +++++++----- arch/x86/kernel/process.h | 24 ++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 9 +++------ arch/x86/kernel/process_64.c | 9 +++------ 5 files changed, 37 insertions(+), 20 deletions(-) create mode 100644 arch/x86/kernel/process.h diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5cb436acd463..676e84f521ba 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -8,9 +8,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *prev, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5111e107a902..b1abe87c5f4d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -35,6 +35,8 @@ #include #include +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -183,11 +185,12 @@ int set_tsc_mode(unsigned int val) return 0; } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -374,8 +377,7 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -385,7 +387,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..020fbfac3a27 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd7be8efdc4c..912246fd6cd9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,8 @@ #include #include +#include "process.h" + void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -264,12 +266,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a2661814bde0..81eec65fe053 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,8 @@ #include #include +#include "process.h" + __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ @@ -454,12 +456,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); #ifdef CONFIG_XEN /* From 1cca4d2637791c2bcefc86c532339cf2918023d7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:48 +0100 Subject: [PATCH 40/77] x86/speculation: Avoid __switch_to_xtra() calls commit 5635d99953f04b550738f6f4c1c532667c3fd872 upstream. The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke __switch_to_xtra() when: - CONFIG_SMP is disabled - The conditional STIPB mode is disabled The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the only set bit in the work masks. Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at run time when the static key controlling the conditional STIBP mode is disabled. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/thread_info.h | 13 +++++++++++-- arch/x86/kernel/process.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 389b95bb1456..cd3f6be025be 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -142,9 +142,18 @@ struct thread_info { _TIF_NOHZ) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ +#define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD|_TIF_SPEC_IB) + _TIF_SSBD) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 020fbfac3a27..898e97cf6629 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -2,6 +2,8 @@ // // Code shared between 32 and 64 bit +#include + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); /* @@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev, unsigned long next_tif = task_thread_info(next)->flags; unsigned long prev_tif = task_thread_info(prev)->flags; + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + /* * __switch_to_xtra() handles debug registers, i/o bitmaps, * speculation mitigations etc. From c89ef65578170416a225d2b3a6c7299a8d0bcf7c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:49 +0100 Subject: [PATCH 41/77] x86/speculation: Prepare for conditional IBPB in switch_mm() commit 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 upstream. The IBPB speculation barrier is issued from switch_mm() when the kernel switches to a user space task with a different mm than the user space task which ran last on the same CPU. An additional optimization is to avoid IBPB when the incoming task can be ptraced by the outgoing task. This optimization only works when switching directly between two user space tasks. When switching from a kernel task to a user space task the optimization fails because the previous task cannot be accessed anymore. So for quite some scenarios the optimization is just adding overhead. The upcoming conditional IBPB support will issue IBPB only for user space tasks which have the TIF_SPEC_IB bit set. This requires to handle the following cases: 1) Switch from a user space task (potential attacker) which has TIF_SPEC_IB set to a user space task (potential victim) which has TIF_SPEC_IB not set. 2) Switch from a user space task (potential attacker) which has TIF_SPEC_IB not set to a user space task (potential victim) which has TIF_SPEC_IB set. This needs to be optimized for the case where the IBPB can be avoided when only kernel threads ran in between user space tasks which belong to the same process. The current check whether two tasks belong to the same context is using the tasks context id. While correct, it's simpler to use the mm pointer because it allows to mangle the TIF_SPEC_IB bit into it. The context id based mechanism requires extra storage, which creates worse code. When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into the per CPU storage which is used to track the last user space mm which was running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of the incoming task to make the decision whether IBPB needs to be issued or not to cover the two cases above. As conditional IBPB is going to be the default, remove the dubious ptrace check for the IBPB always case and simply issue IBPB always when the process changes. Move the storage to a different place in the struct as the original one created a hole. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de [bwh: Backported to 4.9: - Drop changes in initialize_tlbstate_and_flush() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/include/asm/tlbflush.h | 8 +- arch/x86/kernel/cpu/bugs.c | 29 +++++-- arch/x86/mm/tlb.c | 113 ++++++++++++++++++++------- 4 files changed, 117 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dab01da02de4..e655341bffe9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -303,6 +303,8 @@ do { \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 686a58d793e5..f5ca15622dc9 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -68,8 +68,12 @@ static inline void invpcid_flush_all_nonglobals(void) struct tlb_state { struct mm_struct *active_mm; int state; - /* last user mm's ctx id */ - u64 last_ctx_id; + + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; /* * Access to this CR4 shadow and to H/W CR4 is protected by diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c52d5596d943..8d92f87f218f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -54,6 +54,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; /* Control conditional STIPB in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); void __init check_bugs(void) { @@ -329,7 +333,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + + switch (mode) { + case SPECTRE_V2_USER_STRICT: + static_branch_enable(&switch_mm_always_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ @@ -961,10 +975,15 @@ static char *stibp_state(void) static char *ibpb_state(void) { - if (boot_cpu_has(X86_FEATURE_USE_IBPB)) - return ", IBPB"; - else - return ""; + if (boot_cpu_has(X86_FEATURE_IBPB)) { + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", IBPB: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", IBPB: always-on"; + } + } + return ""; } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ff8f8e529317..a112bb175dd4 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -31,6 +30,12 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); struct flush_tlb_info { @@ -102,17 +107,87 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, local_irq_restore(flags); } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) { + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) +{ + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -121,22 +196,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); - /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -152,14 +217,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); From 607a3b3bbd5ba62a3d004e92d2149e040086c498 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:51 +0100 Subject: [PATCH 42/77] x86/speculation: Split out TIF update commit e6da8bb6f9abb2628381904b24163c770e630bac upstream. The update of the TIF_SSBD flag and the conditional speculation control MSR update is done in the ssb_prctl_set() function directly. The upcoming prctl support for controlling indirect branch speculation via STIBP needs the same mechanism. Split the code out and make it reusable. Reword the comment about updates for other tasks. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8d92f87f218f..0d3f346c68f8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -707,10 +707,29 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) { bool update; + if (on) + update = !test_and_set_tsk_thread_flag(tsk, tifbit); + else + update = test_and_clear_tsk_thread_flag(tsk, tifbit); + + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current && update) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -721,28 +740,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, false); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; default: return -ERANGE; } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculation_ctrl_update_current(); - return 0; } From 6596ca955bf6d04fe2961215f22f84c13ca7217f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:52 +0100 Subject: [PATCH 43/77] x86/speculation: Prepare arch_smt_update() for PRCTL mode commit 6893a959d7fdebbab5f5aa112c277d5a44435ba1 upstream. The upcoming fine grained per task STIBP control needs to be updated on CPU hotplug as well. Split out the code which controls the strict mode so the prctl control code can be added later. Mark the SMP function call argument __unused while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 50 ++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0d3f346c68f8..fbf491447cf9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -535,40 +535,44 @@ specv2_set_mode: arch_smt_update(); } -static bool stibp_needed(void) -{ - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - /* Check for strict user mitigation mode */ - return spectre_v2_user == SPECTRE_V2_USER_STRICT; -} - -static void update_stibp_msr(void *info) +static void update_stibp_msr(void * __unused) { wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) +{ + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); +} + void arch_smt_update(void) { - u64 mask; - - if (!stibp_needed()) + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; } + mutex_unlock(&spec_ctrl_mutex); } From 6febf94d190c1cf977247fe4519a01f0828b68ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Nov 2018 10:56:57 +0100 Subject: [PATCH 44/77] x86/speculation: Prevent stale SPEC_CTRL msr content commit 6d991ba509ebcfcc908e009d1db51972a4f7a064 upstream. The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/spec-ctrl.h | 6 +----- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 18 +++++++----------- arch/x86/kernel/process.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 27b0bce3933b..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); - -static inline void speculation_ctrl_update_current(void) -{ - speculation_ctrl_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index cd3f6be025be..0438f7fbb383 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -113,6 +114,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -144,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fbf491447cf9..d5c501d19828 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -711,14 +711,10 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; - - if (on) - update = !test_and_set_tsk_thread_flag(tsk, tifbit); - else - update = test_and_clear_tsk_thread_flag(tsk, tifbit); + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); /* * Immediately update the speculation control MSRs for the current @@ -728,7 +724,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) * This can only happen for SECCOMP mitigation. For PRCTL it's * always the current task. */ - if (tsk == current && update) + if (tsk == current) speculation_ctrl_update_current(); } @@ -744,16 +740,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, false); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; default: return -ERANGE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b1abe87c5f4d..cb58f39e1c9e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -369,6 +369,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, wrmsrl(MSR_IA32_SPEC_CTRL, msr); } +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +{ + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; +} + void speculation_ctrl_update(unsigned long tif) { /* Forced update. Make sure all relevant TIF flags are different */ @@ -377,6 +389,14 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; @@ -405,7 +425,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); - __speculation_ctrl_update(tifp, tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* From 2d99bc055e458eaaf78e4901e78961546eecf5f4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: [PATCH 45/77] x86/speculation: Add prctl() control for indirect branch speculation commit 9137bb27e60e554dab694eafa4cca241fa3a694f upstream. Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de [bwh: Backported to 4.9: - Renumber the PFA flags - Drop changes in tools/include/uapi/linux/prctl.h - Adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/spec_ctrl.txt | 9 ++++ arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 ++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 5 +++ include/linux/sched.h | 9 ++++ include/uapi/linux/prctl.h | 1 + 6 files changed, 92 insertions(+) diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt index 32f3d55c54b7..c4dbe6f7cdae 100644 --- a/Documentation/spec_ctrl.txt +++ b/Documentation/spec_ctrl.txt @@ -92,3 +92,12 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + +- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes + (Mitigate Spectre V2 style attacks against user processes) + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e655341bffe9..37ea761495c5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -223,6 +223,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d5c501d19828..3308f18a637d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -571,6 +571,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -757,12 +759,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -795,11 +835,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -980,6 +1043,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -992,6 +1057,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cb58f39e1c9e..2e4eab22ca37 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -376,6 +376,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index ebd0afb35d16..1c487a3abd84 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2357,6 +2357,8 @@ static inline void memalloc_noio_restore(unsigned int flags) #define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 6 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 7 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ @@ -2390,6 +2392,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + /* * task->jobctl flags */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 64776b72e1eb..64ec0d62e5f5 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -202,6 +202,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) From 91d9bbd3e4bdb0494ba1d2922646cabb6b8e6e2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:54 +0100 Subject: [PATCH 46/77] x86/speculation: Enable prctl mode for spectre_v2_user commit 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 upstream. Now that all prerequisites are in place: - Add the prctl command line option - Default the 'auto' mode to 'prctl' - When SMT state changes, update the static key which controls the conditional STIBP evaluation on context switch. - At init update the static key which controls the conditional IBPB evaluation on context switch. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 7 ++++- arch/x86/kernel/cpu/bugs.c | 41 ++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cef2cb9cbf8a..f87e4bf2ab25 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4075,9 +4075,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. off - Unconditionally disable mitigations. Is enforced by spectre_v2=off + prctl - Indirect branch speculation is enabled, + but mitigation can be enabled via prctl + per thread. The mitigation control state + is inherited on fork. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is off. + Default is prctl. Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3308f18a637d..e28598f3ab12 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -253,11 +253,13 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_NONE, SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", }; static const struct { @@ -268,6 +270,7 @@ static const struct { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -322,12 +325,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) smt_possible = false; switch (spectre_v2_parse_user_cmdline(v2_cmd)) { - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_PRCTL: + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -338,6 +344,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_STRICT: static_branch_enable(&switch_mm_always_ibpb); break; + case SPECTRE_V2_USER_PRCTL: + static_branch_enable(&switch_mm_cond_ibpb); + break; default: break; } @@ -350,6 +359,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; set_mode: spectre_v2_user = mode; /* Only print the STIBP mode when SMT possible */ @@ -557,6 +572,15 @@ static void update_stibp_strict(void) on_each_cpu(update_stibp_msr, NULL, 1); } +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -572,6 +596,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + update_indir_branch_cond(); break; } @@ -1044,7 +1069,8 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: - return ""; + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; } return ""; } @@ -1052,14 +1078,11 @@ static char *stibp_state(void) static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: - return ", IBPB: disabled"; - case SPECTRE_V2_USER_STRICT: + if (static_key_enabled(&switch_mm_always_ibpb)) return ", IBPB: always-on"; - case SPECTRE_V2_USER_PRCTL: - return ""; - } + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; } return ""; } From 6f4b925ec2943ee6054658eb06fa7a68927486a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:55 +0100 Subject: [PATCH 47/77] x86/speculation: Add seccomp Spectre v2 user space protection mode commit 6b3e64c237c072797a9ec918654a60e3a46488e2 upstream. If 'prctl' mode of user space protection from spectre v2 is selected on the kernel command-line, STIBP and IBPB are applied on tasks which restrict their indirect branch speculation via prctl. SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it makes sense to prevent spectre v2 user space to user space attacks as well. The Intel mitigation guide documents how STIPB works: Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor prevents the predicted targets of indirect branches on any logical processor of that core from being controlled by software that executes (or executed previously) on another logical processor of the same core. Ergo setting STIBP protects the task itself from being attacked from a task running on a different hyper-thread and protects the tasks running on different hyper-threads from being attacked. While the document suggests that the branch predictors are shielded between the logical processors, the observed performance regressions suggest that STIBP simply disables the branch predictor more or less completely. Of course the document wording is vague, but the fact that there is also no requirement for issuing IBPB when STIBP is used points clearly in that direction. The kernel still issues IBPB even when STIBP is used until Intel clarifies the whole mechanism. IBPB is issued when the task switches out, so malicious sandbox code cannot mistrain the branch predictor for the next user space task on the same logical processor. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 9 ++++++++- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f87e4bf2ab25..65a6e663f487 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4080,9 +4080,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. per thread. The mitigation control state is inherited on fork. + seccomp + - Same as "prctl" above, but all seccomp + threads will enable the mitigation unless + they explicitly opt out. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is prctl. + + Default mitigation: + If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 37ea761495c5..14bf299b369f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -224,6 +224,7 @@ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e28598f3ab12..00263ee4f6a1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -254,12 +254,14 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_SECCOMP, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -271,6 +273,7 @@ static const struct { { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -330,10 +333,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_PRCTL: mode = SPECTRE_V2_USER_PRCTL; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -345,6 +354,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static_branch_enable(&switch_mm_always_ibpb); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -596,6 +606,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: update_indir_branch_cond(); break; } @@ -838,6 +849,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -869,6 +882,7 @@ static int ib_prctl_get(struct task_struct *task) case SPECTRE_V2_USER_NONE: return PR_SPEC_ENABLE; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) @@ -1069,6 +1083,7 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) return ", STIBP: conditional"; } From e58cf37a3c2e102af3f28a7bd24bc5aa03c75564 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:56 +0100 Subject: [PATCH 48/77] x86/speculation: Provide IBPB always command line options commit 55a974021ec952ee460dc31ca08722158639de72 upstream. Provide the possibility to enable IBPB always in combination with 'prctl' and 'seccomp'. Add the extra command line options and rework the IBPB selection to evaluate the command instead of the mode selected by the STIPB switch case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 12 ++++++++++ arch/x86/kernel/cpu/bugs.c | 34 +++++++++++++++++++---------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 65a6e663f487..9e5560f52e9b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4080,11 +4080,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted. per thread. The mitigation control state is inherited on fork. + prctl,ibpb + - Like "prctl" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different user + space processes. + seccomp - Same as "prctl" above, but all seccomp threads will enable the mitigation unless they explicitly opt out. + seccomp,ibpb + - Like "seccomp" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different + user space processes. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 00263ee4f6a1..a7e9a93e387a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -254,7 +254,9 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, }; static const char * const spectre_v2_user_strings[] = { @@ -269,11 +271,13 @@ static const struct { enum spectre_v2_user_cmd cmd; bool secure; } v2_user_options[] __initdata = { - { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, - { "off", SPECTRE_V2_USER_CMD_NONE, false }, - { "on", SPECTRE_V2_USER_CMD_FORCE, true }, - { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, - { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -319,6 +323,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; @@ -327,17 +332,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: mode = SPECTRE_V2_USER_PRCTL; break; case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: if (IS_ENABLED(CONFIG_SECCOMP)) mode = SPECTRE_V2_USER_SECCOMP; else @@ -349,12 +357,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - switch (mode) { - case SPECTRE_V2_USER_STRICT: + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -362,7 +373,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", - mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ From 6198041f012eda83d6cfb28912db6061ee2702b4 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 5 Dec 2018 17:19:56 -0200 Subject: [PATCH 49/77] kvm: x86: Report STIBP on GET_SUPPORTED_CPUID commit d7b09c827a6cf291f66637a36f46928dd1423184 upstream. Months ago, we have added code to allow direct access to MSR_IA32_SPEC_CTRL to the guest, which makes STIBP available to guests. This was implemented by commits d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL") and b2ac58f90540 ("KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL"). However, we never updated GET_SUPPORTED_CPUID to let userspace know that STIBP can be enabled in CPUID. Fix that by updating kvm_cpuid_8000_0008_ebx_x86_features and kvm_cpuid_7_0_edx_x86_features. Signed-off-by: Eduardo Habkost Reviewed-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b6435f3be254..82c670b7bda1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -356,7 +356,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO); + F(AMD_SSB_NO) | F(AMD_STIBP); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -381,7 +381,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = - F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); + F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | + F(INTEL_STIBP); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); From b76f8af91206a12a68773d3c86f3f343d611deb0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Feb 2019 12:36:50 +0100 Subject: [PATCH 50/77] x86/msr-index: Cleanup bit defines commit d8eabc37310a92df40d07c5a8afc53cebf996716 upstream. Greg pointed out that speculation related bit defines are using (1 << N) format instead of BIT(N). Aside of that (1 << N) is wrong as it should use 1UL at least. Clean it up. [ Josh Poimboeuf: Fix tools build ] Reported-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: Drop change to x86_energy_perf_policy, which doesn't use msr-index.h here] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 34 ++++++++++++++++-------------- tools/power/x86/turbostat/Makefile | 2 +- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d7813b118ac8..dc582c10586c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include + /* * CPU model specific register (MSR) numbers. * @@ -38,14 +40,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 @@ -62,20 +64,20 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 8561e7ddca59..92be948c922d 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -8,7 +8,7 @@ ifeq ("$(origin O)", "command line") endif turbostat : turbostat.c -CFLAGS += -Wall +CFLAGS += -Wall -I../../../include CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c From d5272d01ef727181cbc36292bc02425e6993ef5b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 10:10:23 +0100 Subject: [PATCH 51/77] x86/speculation: Consolidate CPU whitelists commit 36ad35131adacc29b328b9c8b6277a8bf0d6fd5d upstream. The CPU vulnerability whitelists have some overlap and there are more whitelists coming along. Use the driver_data field in the x86_cpu_id struct to denote the whitelisted vulnerabilities and combine all whitelists into one. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 105 +++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cf3ee870f928..e48e2463720c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -891,60 +891,68 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) c->x86_cache_bits = c->x86_phys_bits; } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) + +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } + +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), + + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), {} }; -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - {} -}; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; - -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - {} -}; + return m && !!(m->driver_data & which); +} static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -953,15 +961,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -970,7 +977,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); From fbf6ad08fd9ba697f6d127dbf089739fecbd433e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:16 -0800 Subject: [PATCH 52/77] x86/speculation/mds: Add basic bug infrastructure for MDS commit ed5194c2732c8084af9fd159c146ea92bf137128 upstream. Microarchitectural Data Sampling (MDS), is a class of side channel attacks on internal buffers in Intel CPUs. The variants are: - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a dependent load (store-to-load forwarding) as an optimization. The forward can also happen to a faulting or assisting load operation for a different memory address, which can be exploited under certain conditions. Store buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage L1 miss situations and to hold data which is returned or sent in response to a memory or I/O operation. Fill buffers can forward data to a load operation and also write data to the cache. When the fill buffer is deallocated it can retain the stale data of the preceding operations which can then be forwarded to a faulting or assisting load operation, which can be exploited under certain conditions. Fill buffers are shared between Hyper-Threads so cross thread leakage is possible. MLDPS leaks Load Port Data. Load ports are used to perform load operations from memory or I/O. The received data is then forwarded to the register file or a subsequent operation. In some implementations the Load Port can contain stale data from a previous operation which can be forwarded to faulting or assisting loads under certain conditions, which again can be exploited eventually. Load ports are shared between Hyper-Threads so cross thread leakage is possible. All variants have the same mitigation for single CPU thread case (SMT off), so the kernel can treat them as one MDS issue. Add the basic infrastructure to detect if the current CPU is affected by MDS. [ tglx: Rewrote changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: adjust context, indentation] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/msr-index.h | 5 +++++ arch/x86/kernel/cpu/common.c | 23 +++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 234d74186046..c7510dbdf238 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -317,6 +317,7 @@ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ @@ -354,5 +355,6 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index dc582c10586c..38f94d07920d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -72,6 +72,11 @@ * attack, so no Speculative Store Bypass * control required. */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e48e2463720c..9513280b84fa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,6 +895,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_MELTDOWN BIT(1) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) +#define NO_MDS BIT(4) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -911,6 +912,7 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + /* Intel Family 6 */ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), @@ -927,17 +929,19 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(CORE_YONAH, NO_SSB), VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_L1TF), - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; @@ -968,6 +972,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(NO_MELTDOWN)) return; From 1cdffecc34ba5d5af61b456fb0f46abbb3a86816 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 1 Mar 2019 20:21:08 +0100 Subject: [PATCH 53/77] x86/speculation/mds: Add BUG_MSBDS_ONLY commit e261f209c3666e842fd645a1e31f001c3a26def9 upstream. This bug bit is set on CPUs which are only affected by Microarchitectural Store Buffer Data Sampling (MSBDS) and not by any other MDS variant. This is important because the Store Buffers are partitioned between Hyper-Threads so cross thread forwarding is not possible. But if a thread enters or exits a sleep state the store buffer is repartitioned which can expose data from one thread to the other. This transition can be mitigated. That means that for CPUs which are only affected by MSBDS SMT can be enabled, if the CPU is not affected by other SMT sensitive vulnerabilities, e.g. L1TF. The XEON PHI variants fall into that category. Also the Silvermont/Airmont ATOMs, but for them it's not really relevant as they do not support SMT, but mark them for completeness sake. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: adjust context, indentation] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c7510dbdf238..06de338be0d8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -356,5 +356,6 @@ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9513280b84fa..cda130dc56b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -896,6 +896,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SSB BIT(2) #define NO_L1TF BIT(3) #define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -919,16 +920,16 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), @@ -972,8 +973,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } if (cpu_matches(NO_MELTDOWN)) return; From eb2aa332cfe39e05585534017ad94b7717dbdf85 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 18 Jan 2019 16:50:23 -0800 Subject: [PATCH 54/77] x86/kvm: Expose X86_FEATURE_MD_CLEAR to guests commit 6c4dbbd14730c43f4ed808a9c42ca41625925c22 upstream. X86_FEATURE_MD_CLEAR is a new CPUID bit which is set when microcode provides the mechanism to invoke a flush of various exploitable CPU buffers by invoking the VERW instruction. Hand it through to guests so they can adjust their mitigations. This also requires corresponding qemu changes, which are available separately. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 82c670b7bda1..fc8236fd2495 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | - F(INTEL_STIBP); + F(INTEL_STIBP) | F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); From 96ef7afd8c38c88419d1bd85f6cc25c3aa403224 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:13:06 +0100 Subject: [PATCH 55/77] x86/speculation/mds: Add mds_clear_cpu_buffers() commit 6a9e529272517755904b7afa639f6db59ddb793e upstream. The Microarchitectural Data Sampling (MDS) vulernabilities are mitigated by clearing the affected CPU buffers. The mechanism for clearing the buffers uses the unused and obsolete VERW instruction in combination with a microcode update which triggers a CPU buffer clear when VERW is executed. Provide a inline function with the assembly magic. The argument of the VERW instruction must be a memory operand as documented: "MD_CLEAR enumerates that the memory-operand variant of VERW (for example, VERW m16) has been extended to also overwrite buffers affected by MDS. This buffer overwriting functionality is not guaranteed for the register operand variant of VERW." Documentation also recommends to use a writable data segment selector: "The buffer overwriting occurs regardless of the result of the VERW permission check, as well as when the selector is null or causes a descriptor load segment violation. However, for lowest latency we recommend using a selector that indicates a valid writable data segment." Add x86 specific documentation about MDS and the internal workings of the mitigation. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: add the "Architecture-specific documentation" section to the index] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/index.rst | 11 ++++ Documentation/x86/conf.py | 10 +++ Documentation/x86/index.rst | 8 +++ Documentation/x86/mds.rst | 99 ++++++++++++++++++++++++++++ arch/x86/include/asm/nospec-branch.h | 25 +++++++ 5 files changed, 153 insertions(+) create mode 100644 Documentation/x86/conf.py create mode 100644 Documentation/x86/index.rst create mode 100644 Documentation/x86/mds.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index 213399aac757..18b2484d19ef 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -20,6 +20,17 @@ Contents: gpu/index 80211/index +Architecture-specific documentation +----------------------------------- + +These books provide programming details about architecture-specific +implementation. + +.. toctree:: + :maxdepth: 2 + + x86/index + Indices and tables ================== diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py new file mode 100644 index 000000000000..33c5c3142e20 --- /dev/null +++ b/Documentation/x86/conf.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8; mode: python -*- + +project = "X86 architecture specific documentation" + +tags.add("subproject") + +latex_documents = [ + ('index', 'x86.tex', project, + 'The kernel development community', 'manual'), +] diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst new file mode 100644 index 000000000000..ef389dcf1b1d --- /dev/null +++ b/Documentation/x86/index.rst @@ -0,0 +1,8 @@ +========================== +x86 architecture specifics +========================== + +.. toctree:: + :maxdepth: 1 + + mds diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst new file mode 100644 index 000000000000..1096738d50f2 --- /dev/null +++ b/Documentation/x86/mds.rst @@ -0,0 +1,99 @@ +Microarchitectural Data Sampling (MDS) mitigation +================================================= + +.. _mds: + +Overview +-------- + +Microarchitectural Data Sampling (MDS) is a family of side channel attacks +on internal buffers in Intel CPUs. The variants are: + + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) + +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a +dependent load (store-to-load forwarding) as an optimization. The forward +can also happen to a faulting or assisting load operation for a different +memory address, which can be exploited under certain conditions. Store +buffers are partitioned between Hyper-Threads so cross thread forwarding is +not possible. But if a thread enters or exits a sleep state the store +buffer is repartitioned which can expose data from one thread to the other. + +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage +L1 miss situations and to hold data which is returned or sent in response +to a memory or I/O operation. Fill buffers can forward data to a load +operation and also write data to the cache. When the fill buffer is +deallocated it can retain the stale data of the preceding operations which +can then be forwarded to a faulting or assisting load operation, which can +be exploited under certain conditions. Fill buffers are shared between +Hyper-Threads so cross thread leakage is possible. + +MLPDS leaks Load Port Data. Load ports are used to perform load operations +from memory or I/O. The received data is then forwarded to the register +file or a subsequent operation. In some implementations the Load Port can +contain stale data from a previous operation which can be forwarded to +faulting or assisting loads under certain conditions, which again can be +exploited eventually. Load ports are shared between Hyper-Threads so cross +thread leakage is possible. + + +Exposure assumptions +-------------------- + +It is assumed that attack code resides in user space or in a guest with one +exception. The rationale behind this assumption is that the code construct +needed for exploiting MDS requires: + + - to control the load to trigger a fault or assist + + - to have a disclosure gadget which exposes the speculatively accessed + data for consumption through a side channel. + + - to control the pointer through which the disclosure gadget exposes the + data + +The existence of such a construct in the kernel cannot be excluded with +100% certainty, but the complexity involved makes it extremly unlikely. + +There is one exception, which is untrusted BPF. The functionality of +untrusted BPF is limited, but it needs to be thoroughly investigated +whether it can be used to create such a construct. + + +Mitigation strategy +------------------- + +All variants have the same mitigation strategy at least for the single CPU +thread case (SMT off): Force the CPU to clear the affected buffers. + +This is achieved by using the otherwise unused and obsolete VERW +instruction in combination with a microcode update. The microcode clears +the affected CPU buffers when the VERW instruction is executed. + +For virtualization there are two ways to achieve CPU buffer +clearing. Either the modified VERW instruction or via the L1D Flush +command. The latter is issued when L1TF mitigation is enabled so the extra +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to +be issued. + +If the VERW instruction with the supplied segment selector argument is +executed on a CPU without the microcode update there is no side effect +other than a small number of pointlessly wasted CPU cycles. + +This does not protect against cross Hyper-Thread attacks except for MSBDS +which is only exploitable cross Hyper-thread when one of the Hyper-Threads +enters a C-state. + +The kernel provides a function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state +(idle) transitions. + +According to current knowledge additional mitigations inside the kernel +itself are not required because the necessary gadgets to expose the leaked +data cannot be controlled in a way which allows exploitation from malicious +user space or VM guests. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 14bf299b369f..4d11e89351f1 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -308,6 +308,31 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +#include + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + #endif /* __ASSEMBLY__ */ /* From 20041a0ebf3f9d99db3a8ffd81a679b925cb9fe4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:42:51 +0100 Subject: [PATCH 56/77] x86/speculation/mds: Clear CPU buffers on exit to user commit 04dcbdb8057827b043b3c71aa397c4c63e67d086 upstream. Add a static key which controls the invocation of the CPU buffer clear mechanism on exit to user space and add the call into prepare_exit_to_usermode() and do_nmi() right before actually returning. Add documentation which kernel to user space transition this covers and explain why some corner cases are not mitigated. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 52 ++++++++++++++++++++++++++++ arch/x86/entry/common.c | 3 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++ arch/x86/kernel/cpu/bugs.c | 3 ++ arch/x86/kernel/nmi.c | 4 +++ arch/x86/kernel/traps.c | 8 +++++ 6 files changed, 83 insertions(+) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 1096738d50f2..54d935bf283b 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -97,3 +97,55 @@ According to current knowledge additional mitigations inside the kernel itself are not required because the necessary gadgets to expose the leaked data cannot be controlled in a way which allows exploitation from malicious user space or VM guests. + +Mitigation points +----------------- + +1. Return to user space +^^^^^^^^^^^^^^^^^^^^^^^ + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel + command line. The migitation is enabled through the static key + mds_user_clear. + + The mitigation is invoked in prepare_exit_to_usermode() which covers + most of the kernel to user space transitions. There are a few exceptions + which are not invoking prepare_exit_to_usermode() on return to user + space. These exceptions use the paranoid exit code. + + - Non Maskable Interrupt (NMI): + + Access to sensible data like keys, credentials in the NMI context is + mostly theoretical: The CPU can do prefetching or execute a + misspeculated code path and thereby fetching data which might end up + leaking through a buffer. + + But for mounting other attacks the kernel stack address of the task is + already valuable information. So in full mitigation mode, the NMI is + mitigated on the return from do_nmi() to provide almost complete + coverage. + + - Double fault (#DF): + + A double fault is usually fatal, but the ESPFIX workaround, which can + be triggered from user space through modify_ldt(2) is a recoverable + double fault. #DF uses the paranoid exit path, so explicit mitigation + in the double fault handler is required. + + - Machine Check Exception (#MC): + + Another corner case is a #MC which hits between the CPU buffer clear + invocation and the actual return to user. As this still is in kernel + space it takes the paranoid exit path which does not clear the CPU + buffers. So the #MC handler repopulates the buffers to some + extent. Machine checks are not reliably controllable and the window is + extremly small so mitigation would just tick a checkbox that this + theoretical corner case is covered. To keep the amount of special + cases small, ignore #MC. + + - Debug Exception (#DB): + + This takes the paranoid exit path only when the INT1 breakpoint is in + kernel space. #DB on a user space address takes the regular exit path, + so no extra mitigation required. diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b0cd306dc527..8841d016b4a4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -206,6 +207,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d11e89351f1..421015ef1703 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -308,6 +308,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); + #include /** @@ -333,6 +335,17 @@ static inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a7e9a93e387a..4dc04dc913c3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,6 +59,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); + void __init check_bugs(void) { identify_boot_cpu(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bfe4d6c96fbd..6b7b35d80264 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -32,6 +32,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -544,6 +545,9 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5bbfa2f63b8c..ef225fa8e928 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -340,6 +341,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&normal_regs->orig_ax; + /* + * This situation can be triggered by userspace via + * modify_ldt(2) and the return does not take the regular + * user space exit, so a CPU buffer clear is required when + * MDS mitigation is enabled. + */ + mds_user_clear_cpu_buffers(); return; } #endif From 3a8e7f6993c8240f6cc8564ff06702512b3b18bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Feb 2019 12:48:14 +0100 Subject: [PATCH 57/77] x86/kvm/vmx: Add MDS protection when L1D Flush is not active commit 650b68a0622f933444a6d66936abb3103029413b upstream. CPUs which are affected by L1TF and MDS mitigate MDS with the L1D Flush on VMENTER when updated microcode is installed. If a CPU is not affected by L1TF or if the L1D Flush is not in use, then MDS mitigation needs to be invoked explicitly. For these cases, follow the host mitigation state and invoke the MDS mitigation before VMENTER. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 1 + arch/x86/kvm/vmx.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4dc04dc913c3..8412998b3440 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -61,6 +61,7 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); void __init check_bugs(void) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 75466d9417b8..5e44c255de6b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9206,8 +9206,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->__launched = vmx->loaded_vmcs->launched; + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); asm( /* Store host registers */ From 2394f5912c223b767be0c4f8365570335110a8c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 23:04:01 +0100 Subject: [PATCH 58/77] x86/speculation/mds: Conditionally clear CPU buffers on idle entry commit 07f07f55a29cb705e221eda7894dd67ab81ef343 upstream. Add a static key which controls the invocation of the CPU buffer clear mechanism on idle entry. This is independent of other MDS mitigations because the idle entry invocation to mitigate the potential leakage due to store buffer repartitioning is only necessary on SMT systems. Add the actual invocations to the different halt/mwait variants which covers all usage sites. mwaitx is not patched as it's not available on Intel CPUs. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU is spilled to the Hyper-Thread sibling after the Store buffer got repartitioned and all entries are available to the non idle sibling. When coming out of idle the store buffer is partitioned again so each sibling has half of it available. Now CPU which returned from idle could be speculatively exposed to contents of the sibling, but the buffers are flushed either on exit to user space or on VMENTER. When later on conditional buffer clearing is implemented on top of this, then there is no action required either because before returning to user space the context switch will set the condition flag which causes a flush on the return to user path. Note, that the buffer clearing on idle is only sensible on CPUs which are solely affected by MSBDS and not any other variant of MDS because the other MDS variants cannot be mitigated when SMT is enabled, so the buffer clearing on idle would be a window dressing exercise. This intentionally does not handle the case in the acpi/processor_idle driver which uses the legacy IO port interface for C-State transitions for two reasons: - The acpi/processor_idle driver was replaced by the intel_idle driver almost a decade ago. Anything Nehalem upwards supports it and defaults to that new driver. - The legacy IO port interface is likely to be used on older and therefore unaffected CPUs or on systems which do not receive microcode updates anymore, so there is no point in adding that. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Reviewed-by: Frederic Weisbecker Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 42 ++++++++++++++++++++++++++++ arch/x86/include/asm/irqflags.h | 4 +++ arch/x86/include/asm/mwait.h | 7 +++++ arch/x86/include/asm/nospec-branch.h | 12 ++++++++ arch/x86/kernel/cpu/bugs.c | 3 ++ 5 files changed, 68 insertions(+) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 54d935bf283b..87ce8ac9f36e 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -149,3 +149,45 @@ Mitigation points This takes the paranoid exit path only when the INT1 breakpoint is in kernel space. #DB on a user space address takes the regular exit path, so no extra mitigation required. + + +2. C-State transition +^^^^^^^^^^^^^^^^^^^^^ + + When a CPU goes idle and enters a C-State the CPU buffers need to be + cleared on affected CPUs when SMT is active. This addresses the + repartitioning of the store buffer when one of the Hyper-Threads enters + a C-State. + + When SMT is inactive, i.e. either the CPU does not support it or all + sibling threads are offline CPU buffer clearing is not required. + + The idle clearing is enabled on CPUs which are only affected by MSBDS + and not by any other MDS variant. The other MDS variants cannot be + protected against cross Hyper-Thread attacks because the Fill Buffer and + the Load Ports are shared. So on CPUs affected by other variants, the + idle clearing would be a window dressing exercise and is therefore not + activated. + + The invocation is controlled by the static key mds_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of + the system. + + The buffer clear is only invoked before entering the C-State to prevent + that stale data from the idling CPU from spilling to the Hyper-Thread + sibling after the store buffer got repartitioned and all entries are + available to the non idle sibling. + + When coming out of idle the store buffer is partitioned again so each + sibling has half of it available. The back from idle CPU could be then + speculatively exposed to contents of the sibling. The buffers are + flushed either on exit to user space or on VMENTER so malicious code + in user space or the guest cannot speculatively access them. + + The mitigation is hooked into all variants of halt()/mwait(), but does + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver + has been superseded by the intel_idle driver around 2010 and is + preferred on all affected CPUs which are expected to gain the MD_CLEAR + functionality in microcode. Aside of that the IO-Port mechanism is a + legacy interface which is only used on older systems which are either + not affected or do not receive microcode updates anymore. diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 508a062e6cf1..0c8f4281b151 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -5,6 +5,8 @@ #ifndef __ASSEMBLY__ +#include + /* Provide __cpuidle; we can't safely include */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -53,11 +55,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index f37f2d8a2989..0b40cc442bda 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,6 +4,7 @@ #include #include +#include #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -38,6 +39,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -72,6 +75,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -79,6 +84,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 421015ef1703..031a58e84e5b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -309,6 +309,7 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include @@ -346,6 +347,17 @@ static inline void mds_user_clear_cpu_buffers(void) mds_clear_cpu_buffers(); } +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8412998b3440..499677befcc1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -62,6 +62,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); /* Control MDS CPU buffer clear before returning to user space */ DEFINE_STATIC_KEY_FALSE(mds_user_clear); EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); void __init check_bugs(void) { From 4e722ae3141fc6aebadc722b3b10720e2ffd866f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:04:08 +0100 Subject: [PATCH 59/77] x86/speculation/mds: Add mitigation control for MDS commit bc1241700acd82ec69fde98c5763ce51086269f8 upstream. Now that the mitigations are in place, add a command line parameter to control the mitigation, a mitigation selector function and a SMT update mechanism. This is the minimal straight forward initial implementation which just provides an always on/off mode. The command line parameter is: mds=[full|off] This is consistent with the existing mitigations for other speculative hardware vulnerabilities. The idle invocation is dynamically updated according to the SMT state of the system similar to the dynamic update of the STIBP mitigation. The idle mitigation is limited to CPUs which are only affected by MSBDS and not any other variant, because the other variants cannot be mitigated on SMT enabled systems. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 22 +++++++++ arch/x86/include/asm/processor.h | 5 +++ arch/x86/kernel/cpu/bugs.c | 70 +++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9e5560f52e9b..3dcc0af5e441 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2325,6 +2325,28 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: , Specifies range of consoles to be captured by the MDA. + mds= [X86,INTEL] + Control mitigation for the Micro-architectural Data + Sampling (MDS) vulnerability. + + Certain CPUs are vulnerable to an exploit against CPU + internal buffers which can forward information to a + disclosure gadget under certain conditions. + + In vulnerable processors, the speculatively + forwarded data can be used in a cache side channel + attack, to access data to which the attacker does + not have direct access. + + This parameter controls the MDS mitigation. The + options are: + + full - Enable MDS mitigation on vulnerable CPUs + off - Unconditionally disable MDS mitigation + + Not specifying this option is equivalent to + mds=full. + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ee8c6290c421..e8718987c8f5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -874,4 +874,9 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 499677befcc1..ec91c156b820 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -33,6 +33,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -104,6 +105,8 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -209,6 +212,50 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for L1TF-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + static_branch_enable(&mds_user_clear); + else + mds_mitigation = MDS_MITIGATION_OFF; + } + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + + return 0; +} +early_param("mds", mds_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -610,6 +657,26 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -630,6 +697,9 @@ void arch_smt_update(void) break; } + if (mds_mitigation == MDS_MITIGATION_FULL) + update_mds_branch_idle(); + mutex_unlock(&spec_ctrl_mutex); } From ba08d562b066f044e2985ece32b7890f556ee5ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Feb 2019 22:51:43 +0100 Subject: [PATCH 60/77] x86/speculation/mds: Add sysfs reporting for MDS commit 8a4b06d391b0a42a373808979b5028f5c84d9c6a upstream. Add the sysfs reporting file for MDS. It exposes the vulnerability and mitigation state similar to the existing files for the other speculative hardware vulnerabilities. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters [bwh: Backported to 4.9: test x86_hyper instead of using hypervisor_is_type()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++ drivers/base/cpu.c | 8 ++++++ include/linux/cpu.h | 2 ++ 4 files changed, 39 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 069e8d52c991..b4ba4f1ad610 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -357,6 +357,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ec91c156b820..fe374b25f3a6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1161,6 +1162,24 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ +#ifdef CONFIG_HYPERVISOR_GUEST + if (x86_hyper) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } +#endif + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "mitigated" : "disabled"); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1222,6 +1241,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1253,4 +1276,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index f1f4ce7ddb47..3b123735a1c4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -531,11 +531,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -543,6 +550,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spectre_v2.attr, &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, + &dev_attr_mds.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ae5ac89324df..1f88e86193ae 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -54,6 +54,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 81ea109a9b1265e715c1ce5b45f6d0ed99b9f482 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Feb 2019 09:40:40 +0100 Subject: [PATCH 61/77] x86/speculation/mds: Add mitigation mode VMWERV commit 22dd8365088b6403630b82423cf906491859b65e upstream. In virtualized environments it can happen that the host has the microcode update which utilizes the VERW instruction to clear CPU buffers, but the hypervisor is not yet updated to expose the X86_FEATURE_MD_CLEAR CPUID bit to guests. Introduce an internal mitigation mode VMWERV which enables the invocation of the CPU buffer clearing even if X86_FEATURE_MD_CLEAR is not set. If the system has no updated microcode this results in a pointless execution of the VERW instruction wasting a few CPU cycles. If the microcode is updated, but not exposed to a guest then the CPU buffers will be cleared. That said: Virtual Machines Will Eventually Receive Vaccine Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Jon Masters Tested-by: Jon Masters Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 87ce8ac9f36e..3d6f943f1afb 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -93,11 +93,38 @@ The kernel provides a function to invoke the buffer clearing: The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. +As a special quirk to address virtualization scenarios where the host has +the microcode updated, but the hypervisor does not (yet) expose the +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the +hope that it might actually clear the buffers. The state is reflected +accordingly. + According to current knowledge additional mitigations inside the kernel itself are not required because the necessary gadgets to expose the leaked data cannot be controlled in a way which allows exploitation from malicious user space or VM guests. +Kernel internal mitigation modes +-------------------------------- + + ======= ============================================================ + off Mitigation is disabled. Either the CPU is not affected or + mds=off is supplied on the kernel command line + + full Mitigation is eanbled. CPU is affected and MD_CLEAR is + advertised in CPUID. + + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not + advertised in CPUID. That is mainly for virtualization + scenarios where the host has the updated microcode but the + hypervisor does not expose MD_CLEAR in CPUID. It's a best + effort approach without guarantee. + ======= ============================================================ + +If the CPU is affected and mds=off is not supplied on the kernel command +line then the kernel selects the appropriate mitigation mode depending on +the availability of the MD_CLEAR CPUID bit. + Mitigation points ----------------- diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e8718987c8f5..155e49fc7010 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -877,6 +877,7 @@ extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, }; #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index fe374b25f3a6..1d6b7e9f0411 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -221,7 +221,8 @@ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", - [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers" + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", }; static void __init mds_select_mitigation(void) @@ -232,10 +233,9 @@ static void __init mds_select_mitigation(void) } if (mds_mitigation == MDS_MITIGATION_FULL) { - if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) - static_branch_enable(&mds_user_clear); - else - mds_mitigation = MDS_MITIGATION_OFF; + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); } pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -698,8 +698,14 @@ void arch_smt_update(void) break; } - if (mds_mitigation == MDS_MITIGATION_FULL) + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } mutex_unlock(&spec_ctrl_mutex); } From cb106035bd0f0f43c78a29a56c270e1df0e75c24 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Feb 2019 11:10:49 +0100 Subject: [PATCH 62/77] Documentation: Move L1TF to separate directory commit 65fd4cb65b2dad97feb8330b6690445910b56d6a upstream. Move L!TF to a separate directory so the MDS stuff can be added at the side. Otherwise the all hardware vulnerabilites have their own top level entry. Should have done that right away. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jon Masters [bwh: Backported to 4.9: adjust filenames, context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-system-cpu | 2 +- Documentation/hw-vuln/index.rst | 12 ++++++++++++ Documentation/{ => hw-vuln}/l1tf.rst | 0 Documentation/index.rst | 8 +++++++- Documentation/kernel-parameters.txt | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kvm/vmx.c | 4 ++-- 7 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 Documentation/hw-vuln/index.rst rename Documentation/{ => hw-vuln}/l1tf.rst (100%) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index b4ba4f1ad610..1f5d22bbf766 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -371,7 +371,7 @@ Description: Information about CPU vulnerabilities "Mitigation: $M" CPU is affected and mitigation $M is in effect Details about the l1tf file can be found in - Documentation/admin-guide/l1tf.rst + Documentation/hw-vuln/l1tf.rst What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active diff --git a/Documentation/hw-vuln/index.rst b/Documentation/hw-vuln/index.rst new file mode 100644 index 000000000000..8ce2009f1981 --- /dev/null +++ b/Documentation/hw-vuln/index.rst @@ -0,0 +1,12 @@ +======================== +Hardware vulnerabilities +======================== + +This section describes CPU vulnerabilities and provides an overview of the +possible mitigations along with guidance for selecting mitigations if they +are configurable at compile, boot or run time. + +.. toctree:: + :maxdepth: 1 + + l1tf diff --git a/Documentation/l1tf.rst b/Documentation/hw-vuln/l1tf.rst similarity index 100% rename from Documentation/l1tf.rst rename to Documentation/hw-vuln/l1tf.rst diff --git a/Documentation/index.rst b/Documentation/index.rst index 18b2484d19ef..f95c58dbbbc3 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -12,7 +12,6 @@ Contents: :maxdepth: 2 kernel-documentation - l1tf development-process/index dev-tools/tools driver-api/index @@ -20,6 +19,13 @@ Contents: gpu/index 80211/index +This section describes CPU vulnerabilities and their mitigations. + +.. toctree:: + :maxdepth: 1 + + hw-vuln/index + Architecture-specific documentation ----------------------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3dcc0af5e441..12c33e542163 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2082,7 +2082,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Default is 'flush'. - For details see: Documentation/admin-guide/l1tf.rst + For details see: Documentation/hw-vuln/l1tf.rst l2cr= [PPC] diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1d6b7e9f0411..e0c77a46c55a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1097,7 +1097,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5e44c255de6b..8feb4f7e2e59 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9569,8 +9569,8 @@ free_vcpu: return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { From 3880bc168f2188b7e039a9b16a13dbff7b80d462 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Feb 2019 00:02:31 +0100 Subject: [PATCH 63/77] Documentation: Add MDS vulnerability documentation commit 5999bbe7a6ea3c62029532ec84dc06003a1fa258 upstream. Add the initial MDS vulnerability documentation. Signed-off-by: Thomas Gleixner Reviewed-by: Jon Masters [bwh: Backported to 4.9: adjust filenames] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-system-cpu | 3 +- Documentation/hw-vuln/index.rst | 1 + Documentation/hw-vuln/l1tf.rst | 1 + Documentation/hw-vuln/mds.rst | 307 ++++++++++++++++++ Documentation/kernel-parameters.txt | 2 + 5 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 Documentation/hw-vuln/mds.rst diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 1f5d22bbf766..cadb7a9a5218 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -370,8 +370,7 @@ Description: Information about CPU vulnerabilities "Vulnerable" CPU is affected and no mitigation in effect "Mitigation: $M" CPU is affected and mitigation $M is in effect - Details about the l1tf file can be found in - Documentation/hw-vuln/l1tf.rst + See also: Documentation/hw-vuln/index.rst What: /sys/devices/system/cpu/smt /sys/devices/system/cpu/smt/active diff --git a/Documentation/hw-vuln/index.rst b/Documentation/hw-vuln/index.rst index 8ce2009f1981..ffc064c1ec68 100644 --- a/Documentation/hw-vuln/index.rst +++ b/Documentation/hw-vuln/index.rst @@ -10,3 +10,4 @@ are configurable at compile, boot or run time. :maxdepth: 1 l1tf + mds diff --git a/Documentation/hw-vuln/l1tf.rst b/Documentation/hw-vuln/l1tf.rst index 9af977384168..31653a9f0e1b 100644 --- a/Documentation/hw-vuln/l1tf.rst +++ b/Documentation/hw-vuln/l1tf.rst @@ -445,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush module parameter is ignored and writes to the sysfs file are rejected. +.. _mitigation_selection: Mitigation selection guide -------------------------- diff --git a/Documentation/hw-vuln/mds.rst b/Documentation/hw-vuln/mds.rst new file mode 100644 index 000000000000..ff6bfdb97eee --- /dev/null +++ b/Documentation/hw-vuln/mds.rst @@ -0,0 +1,307 @@ +MDS - Microarchitectural Data Sampling +====================================== + +Microarchitectural Data Sampling is a hardware vulnerability which allows +unprivileged speculative access to data which is available in various CPU +internal buffers. + +Affected processors +------------------- + +This vulnerability affects a wide range of Intel processors. The +vulnerability is not present on: + + - Processors from AMD, Centaur and other non Intel vendors + + - Older processor models, where the CPU family is < 6 + + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus) + + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. + +Whether a processor is affected or not can be read out from the MDS +vulnerability file in sysfs. See :ref:`mds_sys_info`. + +Not all processors are affected by all variants of MDS, but the mitigation +is identical for all of them so the kernel treats them as a single +vulnerability. + +Related CVEs +------------ + +The following CVE entries are related to the MDS vulnerability: + + ============== ===== ============================================== + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling + ============== ===== ============================================== + +Problem +------- + +When performing store, load, L1 refill operations, processors write data +into temporary microarchitectural structures (buffers). The data in the +buffer can be forwarded to load operations as an optimization. + +Under certain conditions, usually a fault/assist caused by a load +operation, data unrelated to the load memory address can be speculatively +forwarded from the buffers. Because the load operation causes a fault or +assist and its result will be discarded, the forwarded data will not cause +incorrect program execution or state changes. But a malicious operation +may be able to forward this speculative data to a disclosure gadget which +allows in turn to infer the value via a cache side channel attack. + +Because the buffers are potentially shared between Hyper-Threads cross +Hyper-Thread attacks are possible. + +Deeper technical information is available in the MDS specific x86 +architecture section: :ref:`Documentation/x86/mds.rst `. + + +Attack scenarios +---------------- + +Attacks against the MDS vulnerabilities can be mounted from malicious non +priviledged user space applications running on hosts or guest. Malicious +guest OSes can obviously mount attacks as well. + +Contrary to other speculation based vulnerabilities the MDS vulnerability +does not allow the attacker to control the memory target address. As a +consequence the attacks are purely sampling based, but as demonstrated with +the TLBleed attack samples can be postprocessed successfully. + +Web-Browsers +^^^^^^^^^^^^ + + It's unclear whether attacks through Web-Browsers are possible at + all. The exploitation through Java-Script is considered very unlikely, + but other widely used web technologies like Webassembly could possibly be + abused. + + +.. _mds_sys_info: + +MDS system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current MDS +status of the system: whether the system is vulnerable, and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/mds + +The possible values in this file are: + + ========================================= ================================= + 'Not affected' The processor is not vulnerable + + 'Vulnerable' The processor is vulnerable, + but no mitigation enabled + + 'Vulnerable: Clear CPU buffers attempted' The processor is vulnerable but + microcode is not updated. + The mitigation is enabled on a + best effort basis. + See :ref:`vmwerv` + + 'Mitigation: CPU buffer clear' The processor is vulnerable and the + CPU buffer clearing mitigation is + enabled. + ========================================= ================================= + +If the processor is vulnerable then the following information is appended +to the above information: + + ======================== ============================================ + 'SMT vulnerable' SMT is enabled + 'SMT mitigated' SMT is enabled and mitigated + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== ============================================ + +.. _vmwerv: + +Best effort mitigation mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the processor is vulnerable, but the availability of the microcode based + mitigation mechanism is not advertised via CPUID the kernel selects a best + effort mitigation mode. This mode invokes the mitigation instructions + without a guarantee that they clear the CPU buffers. + + This is done to address virtualization scenarios where the host has the + microcode update applied, but the hypervisor is not yet updated to expose + the CPUID to the guest. If the host has updated microcode the protection + takes effect otherwise a few cpu cycles are wasted pointlessly. + + The state in the mds sysfs file reflects this situation accordingly. + + +Mitigation mechanism +------------------------- + +The kernel detects the affected CPUs and the presence of the microcode +which is required. + +If a CPU is affected and the microcode is available, then the kernel +enables the mitigation by default. The mitigation can be controlled at boot +time via a kernel command line option. See +:ref:`mds_mitigation_control_command_line`. + +.. _cpu_buffer_clear: + +CPU buffer clearing +^^^^^^^^^^^^^^^^^^^ + + The mitigation for MDS clears the affected CPU buffers on return to user + space and when entering a guest. + + If SMT is enabled it also clears the buffers on idle entry when the CPU + is only affected by MSBDS and not any other MDS variant, because the + other variants cannot be protected against cross Hyper-Thread attacks. + + For CPUs which are only affected by MSBDS the user space, guest and idle + transition mitigations are sufficient and SMT is not affected. + +.. _virt_mechanism: + +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection for host to guest transition depends on the L1TF + vulnerability of the CPU: + + - CPU is affected by L1TF: + + If the L1D flush mitigation is enabled and up to date microcode is + available, the L1D flush mitigation is automatically protecting the + guest transition. + + If the L1D flush mitigation is disabled then the MDS mitigation is + invoked explicit when the host MDS mitigation is enabled. + + For details on L1TF and virtualization see: + :ref:`Documentation/hw-vuln//l1tf.rst `. + + - CPU is not affected by L1TF: + + CPU buffers are flushed before entering the guest when the host MDS + mitigation is enabled. + + The resulting MDS protection matrix for the host to guest transition: + + ============ ===== ============= ============ ================= + L1TF MDS VMX-L1FLUSH Host MDS MDS-State + + Don't care No Don't care N/A Not affected + + Yes Yes Disabled Off Vulnerable + + Yes Yes Disabled Full Mitigated + + Yes Yes Enabled Don't care Mitigated + + No Yes N/A Off Vulnerable + + No Yes N/A Full Mitigated + ============ ===== ============= ============ ================= + + This only covers the host to guest transition, i.e. prevents leakage from + host to guest, but does not protect the guest internally. Guests need to + have their own protections. + +.. _xeon_phi: + +XEON PHI specific considerations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The XEON PHI processor family is affected by MSBDS which can be exploited + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow + to use MWAIT in user space (Ring 3) which opens an potential attack vector + for malicious user space. The exposure can be disabled on the kernel + command line with the 'ring3mwait=disable' command line option. + + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated + before the CPU enters a idle state. As XEON PHI is not affected by L1TF + either disabling SMT is not required for full protection. + +.. _mds_smt_control: + +SMT control +^^^^^^^^^^^ + + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That + means on CPUs which are affected by MFBDS or MLPDS it is necessary to + disable SMT for full protection. These are most of the affected CPUs; the + exception is XEON PHI, see :ref:`xeon_phi`. + + Disabling SMT can have a significant performance impact, but the impact + depends on the type of workloads. + + See the relevant chapter in the L1TF mitigation documentation for details: + :ref:`Documentation/hw-vuln/l1tf.rst `. + + +.. _mds_mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the MDS mitigations at boot +time with the option "mds=". The valid arguments for this option are: + + ============ ============================================================= + full If the CPU is vulnerable, enable all available mitigations + for the MDS vulnerability, CPU buffer clearing on exit to + userspace and when entering a VM. Idle transitions are + protected as well if SMT is enabled. + + It does not automatically disable SMT. + + off Disables MDS mitigations completely. + + ============ ============================================================= + +Not specifying this option is equivalent to "mds=full". + + +Mitigation selection guide +-------------------------- + +1. Trusted userspace +^^^^^^^^^^^^^^^^^^^^ + + If all userspace applications are from a trusted source and do not + execute untrusted code which is supplied externally, then the mitigation + can be disabled. + + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The same considerations as above versus trusted user space apply. + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The protection depends on the state of the L1TF mitigations. + See :ref:`virt_mechanism`. + + If the MDS mitigation is enabled and SMT is disabled, guest to host and + guest to guest attacks are prevented. + +.. _mds_default_mitigations: + +Default mitigations +------------------- + + The kernel default mitigations for vulnerable processors are: + + - Enable CPU buffer clearing + + The kernel does not by default enforce the disabling of SMT, which leaves + SMT systems vulnerable when running untrusted code. The same rationale as + for L1TF applies. + See :ref:`Documentation/hw-vuln//l1tf.rst `. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 12c33e542163..7164790b82c0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2347,6 +2347,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Not specifying this option is equivalent to mds=full. + For details see: Documentation/hw-vuln/mds.rst + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. From f02eee68e2fc2ded5d620684599826d10392d055 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 09:59:33 -0500 Subject: [PATCH 64/77] x86/speculation/mds: Add mds=full,nosmt cmdline option commit d71eb0ce109a124b0fa714832823b9452f2762cf upstream. Add the mds=full,nosmt cmdline option. This is like mds=full, but with SMT disabled if the CPU is vulnerable. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina [bwh: Backported to 4.9: adjust filenames] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/hw-vuln/mds.rst | 3 +++ Documentation/kernel-parameters.txt | 6 ++++-- arch/x86/kernel/cpu/bugs.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/hw-vuln/mds.rst b/Documentation/hw-vuln/mds.rst index ff6bfdb97eee..aec9e49256b5 100644 --- a/Documentation/hw-vuln/mds.rst +++ b/Documentation/hw-vuln/mds.rst @@ -260,6 +260,9 @@ time with the option "mds=". The valid arguments for this option are: It does not automatically disable SMT. + full,nosmt The same as mds=full, with SMT disabled on vulnerable + CPUs. This is the complete mitigation. + off Disables MDS mitigations completely. ============ ============================================================= diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7164790b82c0..4c134700c120 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2341,8 +2341,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This parameter controls the MDS mitigation. The options are: - full - Enable MDS mitigation on vulnerable CPUs - off - Unconditionally disable MDS mitigation + full - Enable MDS mitigation on vulnerable CPUs + full,nosmt - Enable MDS mitigation and disable + SMT on vulnerable CPUs + off - Unconditionally disable MDS mitigation Not specifying this option is equivalent to mds=full. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e0c77a46c55a..a8bef0a24390 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -218,6 +218,7 @@ static void x86_amd_ssb_disable(void) /* Default mitigation for L1TF-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; static const char * const mds_strings[] = { [MDS_MITIGATION_OFF] = "Vulnerable", @@ -235,8 +236,13 @@ static void __init mds_select_mitigation(void) if (mds_mitigation == MDS_MITIGATION_FULL) { if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; + static_branch_enable(&mds_user_clear); + + if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + cpu_smt_disable(false); } + pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -252,6 +258,10 @@ static int __init mds_cmdline(char *str) mds_mitigation = MDS_MITIGATION_OFF; else if (!strcmp(str, "full")) mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } return 0; } From 98c4b3c2ee37ca65d72d23243b621006b69158fd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:14 -0500 Subject: [PATCH 65/77] x86/speculation: Move arch_smt_update() call to after mitigation decisions commit 7c3658b20194a5b3209a143f63bc9c643c6a3ae2 upstream. arch_smt_update() now has a dependency on both Spectre v2 and MDS mitigations. Move its initial call to after all the mitigation decisions have been made. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a8bef0a24390..a725105a49d1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -108,6 +108,8 @@ void __init check_bugs(void) mds_select_mitigation(); + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -632,9 +634,6 @@ specv2_set_mode: /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) From f8a0bbe4bac879c0caf47ca699925ab29a4a9375 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Apr 2019 10:00:51 -0500 Subject: [PATCH 66/77] x86/speculation/mds: Add SMT warning message commit 39226ef02bfb43248b7db12a4fdccb39d95318e3 upstream. MDS is vulnerable with SMT. Make that clear with a one-time printk whenever SMT first gets enabled. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Acked-by: Jiri Kosina Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a725105a49d1..6a3fcacbb9a0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -667,6 +667,9 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { @@ -687,6 +690,8 @@ static void update_mds_branch_idle(void) static_branch_disable(&mds_idle_clear); } +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -710,6 +715,8 @@ void arch_smt_update(void) switch (mds_mitigation) { case MDS_MITIGATION_FULL: case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); update_mds_branch_idle(); break; case MDS_MITIGATION_OFF: @@ -1139,6 +1146,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS From 450aa01a076d9aa5b459a7a33c74d95eca6a1e37 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 12 Apr 2019 17:50:57 -0400 Subject: [PATCH 67/77] x86/speculation/mds: Fix comment commit cae5ec342645746d617dd420d206e1588d47768a upstream. s/L1TF/MDS/ Signed-off-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6a3fcacbb9a0..b32fb55d94e1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -218,7 +218,7 @@ static void x86_amd_ssb_disable(void) #undef pr_fmt #define pr_fmt(fmt) "MDS: " fmt -/* Default mitigation for L1TF-affected CPUs */ +/* Default mitigation for MDS-affected CPUs */ static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; static bool mds_nosmt __ro_after_init = false; From 3645b361be489077bd85458c40e47be791ca318c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 12 Apr 2019 17:50:58 -0400 Subject: [PATCH 68/77] x86/speculation/mds: Print SMT vulnerable on MSBDS with mitigations off commit e2c3c94788b08891dcf3dbe608f9880523ecd71b upstream. This code is only for CPUs which are affected by MSBDS, but are *not* affected by the other two MDS issues. For such CPUs, enabling the mds_idle_clear mitigation is enough to mitigate SMT. However if user boots with 'mds=off' and still has SMT enabled, we should not report that SMT is mitigated: $cat /sys//devices/system/cpu/vulnerabilities/mds Vulnerable; SMT mitigated But rather: Vulnerable; SMT vulnerable Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20190412215118.294906495@localhost.localdomain Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b32fb55d94e1..d81f55e4ca5c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1196,7 +1196,8 @@ static ssize_t mds_show_state(char *buf) if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], - sched_smt_active() ? "mitigated" : "disabled"); + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); } return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], From edda9c38930f5088a740952d5181bc1aa443e63c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:28 -0500 Subject: [PATCH 69/77] cpu/speculation: Add 'mitigations=' cmdline option commit 98af8452945c55652de68536afdde3b520fec429 upstream. Keeping track of the number of mitigations for all the CPU speculation bugs has become overwhelming for many users. It's getting more and more complicated to decide which mitigations are needed for a given architecture. Complicating matters is the fact that each arch tends to have its own custom way to mitigate the same vulnerability. Most users fall into a few basic categories: a) they want all mitigations off; b) they want all reasonable mitigations on, with SMT enabled even if it's vulnerable; or c) they want all reasonable mitigations on, with SMT disabled if vulnerable. Define a set of curated, arch-independent options, each of which is an aggregation of existing options: - mitigations=off: Disable all mitigations. - mitigations=auto: [default] Enable all the default mitigations, but leave SMT enabled, even if it's vulnerable. - mitigations=auto,nosmt: Enable all the default mitigations, disabling SMT if needed by a mitigation. Currently, these options are placeholders which don't actually do anything. They will be fleshed out in upcoming patches. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/b07a8ef9b7c5055c3a4637c87d07c296d5016fe0.1555085500.git.jpoimboe@redhat.com [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 24 ++++++++++++++++++++++++ include/linux/cpu.h | 24 ++++++++++++++++++++++++ kernel/cpu.c | 15 +++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c134700c120..23af0afe24d1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2473,6 +2473,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted. in the "bleeding edge" mini2440 support kernel at http://repo.or.cz/w/linux-2.6/mini2440.git + mitigations= + Control optional mitigations for CPU vulnerabilities. + This is a set of curated, arch-independent options, each + of which is an aggregation of existing arch-specific + options. + + off + Disable all optional CPU mitigations. This + improves system performance, but it may also + expose users to several CPU vulnerabilities. + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT + enabled, even if it's vulnerable. This is for + users who don't want to be surprised by SMT + getting disabled across kernel upgrades, or who + have other ways of avoiding SMT-based attacks. + This is the default behavior. + + auto,nosmt + Mitigate all CPU vulnerabilities, disabling SMT + if needed. This is for users who always want to + be fully mitigated, even if it means losing SMT. + mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this parameter allows control of the logging verbosity for diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 1f88e86193ae..166686209f2c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -278,4 +278,28 @@ static inline void cpu_smt_check_topology_early(void) { } static inline void cpu_smt_check_topology(void) { } #endif +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +extern enum cpu_mitigations cpu_mitigations; + +/* mitigations=off */ +static inline bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} + +/* mitigations=auto,nosmt */ +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} + #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index a540ca031288..db1a0bc46c3e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2234,3 +2234,18 @@ void __init boot_cpu_hotplug_init(void) #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } + +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; + +static int __init mitigations_parse_cmdline(char *arg) +{ + if (!strcmp(arg, "off")) + cpu_mitigations = CPU_MITIGATIONS_OFF; + else if (!strcmp(arg, "auto")) + cpu_mitigations = CPU_MITIGATIONS_AUTO; + else if (!strcmp(arg, "auto,nosmt")) + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + + return 0; +} +early_param("mitigations", mitigations_parse_cmdline); From 1709284f082fbcb4a8e410242dcec3cc68389cda Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:29 -0500 Subject: [PATCH 70/77] x86/speculation: Support 'mitigations=' cmdline option commit d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812 upstream. Configure x86 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, Speculative Store Bypass, and L1TF. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Jiri Kosina (on x86) Reviewed-by: Jiri Kosina Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Waiman Long Cc: Andrea Arcangeli Cc: Jon Masters Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Tyler Hicks Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Price Cc: Phil Auld Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com [bwh: Backported to 4.9: adjust filenames, context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 16 +++++++++++----- arch/x86/kernel/cpu/bugs.c | 11 +++++++++-- arch/x86/mm/kaiser.c | 4 +++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 23af0afe24d1..5f81df18b2be 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2474,15 +2474,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - Control optional mitigations for CPU vulnerabilities. - This is a set of curated, arch-independent options, each - of which is an aggregation of existing arch-specific - options. + [X86] Control optional mitigations for CPU + vulnerabilities. This is a set of curated, + arch-independent options, each of which is an + aggregation of existing arch-specific options. off Disable all optional CPU mitigations. This improves system performance, but it may also expose users to several CPU vulnerabilities. + Equivalent to: nopti [X86] + nospectre_v2 [X86] + spectre_v2_user=off [X86] + spec_store_bypass_disable=off [X86] + l1tf=off [X86] auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2490,12 +2495,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. users who don't want to be surprised by SMT getting disabled across kernel upgrades, or who have other ways of avoiding SMT-based attacks. - This is the default behavior. + Equivalent to: (default behavior) auto,nosmt Mitigate all CPU vulnerabilities, disabling SMT if needed. This is for users who always want to be fully mitigated, even if it means losing SMT. + Equivalent to: l1tf=flush,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d81f55e4ca5c..c2e07be3773c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -500,7 +500,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || + cpu_mitigations_off()) return SPECTRE_V2_CMD_NONE; ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); @@ -764,7 +765,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) char arg[20]; int ret, i; - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") || + cpu_mitigations_off()) { return SPEC_STORE_BYPASS_CMD_NONE; } else { ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", @@ -1085,6 +1087,11 @@ static void __init l1tf_select_mitigation(void) if (!boot_cpu_has_bug(X86_BUG_L1TF)) return; + if (cpu_mitigations_off()) + l1tf_mitigation = L1TF_MITIGATION_OFF; + else if (cpu_mitigations_auto_nosmt()) + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; + override_cache_bits(&boot_cpu_data); switch (l1tf_mitigation) { diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index 3f729e20f0e3..12522dbae615 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -9,6 +9,7 @@ #include #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt @@ -297,7 +298,8 @@ void __init kaiser_check_boottime_disable(void) goto skip; } - if (cmdline_find_option_bool(boot_command_line, "nopti")) + if (cmdline_find_option_bool(boot_command_line, "nopti") || + cpu_mitigations_off()) goto disable; skip: From 025b9cf2a0fcaf8d971b8bea66f661cf3751c245 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Apr 2019 16:39:02 -0500 Subject: [PATCH 71/77] x86/speculation/mds: Add 'mitigations=' support for MDS commit 5c14068f87d04adc73ba3f41c2a303d3c3d1fa12 upstream. Add MDS to the new 'mitigations=' cmdline option. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/kernel/cpu/bugs.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5f81df18b2be..55a9bbbcf5e1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2488,6 +2488,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86] l1tf=off [X86] + mds=off [X86] auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2502,6 +2503,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. if needed. This is for users who always want to be fully mitigated, even if it means losing SMT. Equivalent to: l1tf=flush,nosmt [X86] + mds=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c2e07be3773c..6eef57e83af7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -230,7 +230,7 @@ static const char * const mds_strings[] = { static void __init mds_select_mitigation(void) { - if (!boot_cpu_has_bug(X86_BUG_MDS)) { + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { mds_mitigation = MDS_MITIGATION_OFF; return; } @@ -241,7 +241,8 @@ static void __init mds_select_mitigation(void) static_branch_enable(&mds_user_clear); - if (mds_nosmt && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } From 96c06cda5b4bdc6a3a9a8f8adc46c86077a70ee0 Mon Sep 17 00:00:00 2001 From: speck for Pawan Gupta Date: Mon, 6 May 2019 12:23:50 -0700 Subject: [PATCH 72/77] x86/mds: Add MDSUM variant to the MDS documentation commit e672f8bf71c66253197e503f75c771dd28ada4a0 upstream. Updated the documentation for a new CVE-2019-11091 Microarchitectural Data Sampling Uncacheable Memory (MDSUM) which is a variant of Microarchitectural Data Sampling (MDS). MDS is a family of side channel attacks on internal buffers in Intel CPUs. MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from memory that takes a fault or assist can leave data in a microarchitectural structure that may later be observed using one of the same methods used by MSBDS, MFBDS or MLPDS. There are no new code changes expected for MDSUM. The existing mitigation for MDS applies to MDSUM as well. Signed-off-by: Pawan Gupta Signed-off-by: Thomas Gleixner Reviewed-by: Tyler Hicks Reviewed-by: Jon Masters [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/hw-vuln/mds.rst | 5 +++-- Documentation/x86/mds.rst | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/hw-vuln/mds.rst b/Documentation/hw-vuln/mds.rst index aec9e49256b5..5e92eca5c0a7 100644 --- a/Documentation/hw-vuln/mds.rst +++ b/Documentation/hw-vuln/mds.rst @@ -32,11 +32,12 @@ Related CVEs The following CVE entries are related to the MDS vulnerability: - ============== ===== ============================================== + ============== ===== =================================================== CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling - ============== ===== ============================================== + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory + ============== ===== =================================================== Problem ------- diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 3d6f943f1afb..979945be257a 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -12,6 +12,7 @@ on internal buffers in Intel CPUs. The variants are: - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091) MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a dependent load (store-to-load forwarding) as an optimization. The forward @@ -38,6 +39,10 @@ faulting or assisting loads under certain conditions, which again can be exploited eventually. Load ports are shared between Hyper-Threads so cross thread leakage is possible. +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from +memory that takes a fault or assist can leave data in a microarchitectural +structure that may later be observed using one of the same methods used by +MSBDS, MFBDS or MLPDS. Exposure assumptions -------------------- From da360f1f5eb43e0d71009bab3be53c7a06d40caf Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Mon, 6 May 2019 23:52:58 +0000 Subject: [PATCH 73/77] Documentation: Correct the possible MDS sysfs values commit ea01668f9f43021b28b3f4d5ffad50106a1e1301 upstream. Adjust the last two rows in the table that display possible values when MDS mitigation is enabled. They both were slightly innacurate. In addition, convert the table of possible values and their descriptions to a list-table. The simple table format uses the top border of equals signs to determine cell width which resulted in the first column being far too wide in comparison to the second column that contained the majority of the text. Signed-off-by: Tyler Hicks Signed-off-by: Thomas Gleixner [bwh: Backported to 4.9: adjust filename] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/hw-vuln/mds.rst | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/Documentation/hw-vuln/mds.rst b/Documentation/hw-vuln/mds.rst index 5e92eca5c0a7..daf6fdac49a3 100644 --- a/Documentation/hw-vuln/mds.rst +++ b/Documentation/hw-vuln/mds.rst @@ -95,22 +95,19 @@ mitigations are active. The relevant sysfs file is: The possible values in this file are: - ========================================= ================================= - 'Not affected' The processor is not vulnerable + .. list-table:: - 'Vulnerable' The processor is vulnerable, - but no mitigation enabled + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable but microcode is not updated. - 'Vulnerable: Clear CPU buffers attempted' The processor is vulnerable but - microcode is not updated. - The mitigation is enabled on a - best effort basis. - See :ref:`vmwerv` - - 'Mitigation: CPU buffer clear' The processor is vulnerable and the - CPU buffer clearing mitigation is - enabled. - ========================================= ================================= + The mitigation is enabled on a best effort basis. See :ref:`vmwerv` + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. If the processor is vulnerable then the following information is appended to the above information: From 2a099011de8abebac475a90dad1835c60dfca88c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 7 May 2019 15:05:22 -0500 Subject: [PATCH 74/77] x86/speculation/mds: Fix documentation typo commit 95310e348a321b45fb746c176961d4da72344282 upstream. Fix a minor typo in the MDS documentation: "eanbled" -> "enabled". Reported-by: Jeff Bastian Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/x86/mds.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst index 979945be257a..534e9baa4e1d 100644 --- a/Documentation/x86/mds.rst +++ b/Documentation/x86/mds.rst @@ -116,7 +116,7 @@ Kernel internal mitigation modes off Mitigation is disabled. Either the CPU is not affected or mds=off is supplied on the kernel command line - full Mitigation is eanbled. CPU is affected and MD_CLEAR is + full Mitigation is enabled. CPU is affected and MD_CLEAR is advertised in CPUID. vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not From 626743f43da44598076019a82193caf49dca1fde Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 Mar 2017 14:20:08 +0200 Subject: [PATCH 75/77] x86: stop exporting msr-index.h to userland commit 25dc1d6cc3082aab293e5dad47623b550f7ddd2a upstream. Even if this file was not in an uapi directory, it was exported because it was listed in the Kbuild file. Fixes: b72e7464e4cf ("x86/uapi: Do not export as part of the user API headers") Suggested-by: Borislav Petkov Signed-off-by: Nicolas Dichtel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Masahiro Yamada Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index 3dec769cadf7..1c532b3f18ea 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -27,7 +27,6 @@ header-y += ldt.h header-y += mce.h header-y += mman.h header-y += msgbuf.h -header-y += msr-index.h header-y += msr.h header-y += mtrr.h header-y += param.h From 192d1975450e51c1abb725343a7e19a4d61e30bd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 29 Mar 2019 17:47:43 -0700 Subject: [PATCH 76/77] x86/cpu/bugs: Use __initconst for 'const' init data commit 1de7edbb59c8f1b46071f66c5c97b8a59569eb51 upstream. Some of the recently added const tables use __initdata which causes section attribute conflicts. Use __initconst instead. Fixes: fa1202ef2243 ("x86/speculation: Add command line control") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190330004743.29541-9-andi@firstfloor.org Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6eef57e83af7..16970c39baea 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -338,7 +338,7 @@ static const struct { const char *option; enum spectre_v2_user_cmd cmd; bool secure; -} v2_user_options[] __initdata = { +} v2_user_options[] __initconst = { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, @@ -475,7 +475,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] __initdata = { +} mitigation_options[] __initconst = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -752,7 +752,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] __initdata = { +} ssb_mitigation_options[] __initconst = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ From ffe8cffc8be1ae47c08cbc3571bed6b5b0fa53ad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 May 2019 19:19:42 +0200 Subject: [PATCH 77/77] Linux 4.9.176 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e52b0579e176..92fe701e5582 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 175 +SUBLEVEL = 176 EXTRAVERSION = NAME = Roaring Lionus