mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-11 13:27:06 +09:00
[x86] KVM: Add mitigation for Machine Check Error on Page Size Change
(aka iTLB multi-hit, CVE-2018-12207)
This commit is contained in:
11
debian/changelog
vendored
11
debian/changelog
vendored
@@ -36,6 +36,17 @@ linux (5.3.7-2) UNRELEASED; urgency=medium
|
||||
- x86/tsx: Add config options to set tsx=on|off|auto
|
||||
TSX is now disabled by default; see
|
||||
Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
|
||||
* [x86] KVM: Add mitigation for Machine Check Error on Page Size Change
|
||||
(aka iTLB multi-hit, CVE-2018-12207):
|
||||
- kvm: x86, powerpc: do not allow clearing largepages debugfs entry
|
||||
- KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active
|
||||
- x86/bugs: Add ITLB_MULTIHIT bug infrastructure
|
||||
- x86/cpu: Add Tremont to the cpu vulnerability whitelist
|
||||
- cpu/speculation: Uninline and export CPU mitigations helpers
|
||||
- kvm: mmu: ITLB_MULTIHIT mitigation
|
||||
- kvm: Add helper function for creating VM worker threads
|
||||
- kvm: x86: mmu: Recovery of shattered NX large pages
|
||||
- Documentation: Add ITLB_MULTIHIT documentation
|
||||
|
||||
[ Bastian Blank ]
|
||||
* [amd64/cloud-amd64] Re-enable RTC drivers. (closes: #931341)
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Sun, 27 Oct 2019 16:23:23 +0100
|
||||
Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is
|
||||
active
|
||||
Origin: https://git.kernel.org/linus/9167ab79936206118cc60e47dcb926c3489f3bd5
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
VMX already does so if the host has SMEP, in order to support the combination of
|
||||
CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in
|
||||
fact VMX already ends up running with EFER.NXE=1 on old processors that lack the
|
||||
"load EFER" controls, because it may help avoiding a slow MSR write. Removing
|
||||
all the conditionals simplifies the code.
|
||||
|
||||
SVM does not have similar code, but it should since recent AMD processors do
|
||||
support SMEP. So this patch also makes the code for the two vendors more similar
|
||||
while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Cc: Joerg Roedel <jroedel@suse.de>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
arch/x86/kvm/svm.c | 10 ++++++++--
|
||||
arch/x86/kvm/vmx/vmx.c | 14 +++-----------
|
||||
2 files changed, 11 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/arch/x86/kvm/svm.c
|
||||
+++ b/arch/x86/kvm/svm.c
|
||||
@@ -736,8 +736,14 @@ static int get_npt_level(struct kvm_vcpu
|
||||
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
|
||||
{
|
||||
vcpu->arch.efer = efer;
|
||||
- if (!npt_enabled && !(efer & EFER_LMA))
|
||||
- efer &= ~EFER_LME;
|
||||
+
|
||||
+ if (!npt_enabled) {
|
||||
+ /* Shadow paging assumes NX to be available. */
|
||||
+ efer |= EFER_NX;
|
||||
+
|
||||
+ if (!(efer & EFER_LMA))
|
||||
+ efer &= ~EFER_LME;
|
||||
+ }
|
||||
|
||||
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
|
||||
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
|
||||
--- a/arch/x86/kvm/vmx/vmx.c
|
||||
+++ b/arch/x86/kvm/vmx/vmx.c
|
||||
@@ -897,17 +897,9 @@ static bool update_transition_efer(struc
|
||||
u64 guest_efer = vmx->vcpu.arch.efer;
|
||||
u64 ignore_bits = 0;
|
||||
|
||||
- if (!enable_ept) {
|
||||
- /*
|
||||
- * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
|
||||
- * host CPUID is more efficient than testing guest CPUID
|
||||
- * or CR4. Host SMEP is anyway a requirement for guest SMEP.
|
||||
- */
|
||||
- if (boot_cpu_has(X86_FEATURE_SMEP))
|
||||
- guest_efer |= EFER_NX;
|
||||
- else if (!(guest_efer & EFER_NX))
|
||||
- ignore_bits |= EFER_NX;
|
||||
- }
|
||||
+ /* Shadow paging assumes NX to be available. */
|
||||
+ if (!enable_ept)
|
||||
+ guest_efer |= EFER_NX;
|
||||
|
||||
/*
|
||||
* LMA and LME handled by hardware; SCE meaningless outside long mode.
|
||||
@@ -0,0 +1,119 @@
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 30 Sep 2019 18:48:44 +0200
|
||||
Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry
|
||||
Origin: https://git.kernel.org/linus/833b45de69a6016c4b0cebe6765d526a31a81580
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
The largepages debugfs entry is incremented/decremented as shadow
|
||||
pages are created or destroyed. Clearing it will result in an
|
||||
underflow, which is harmless to KVM but ugly (and could be
|
||||
misinterpreted by tools that use debugfs information), so make
|
||||
this particular statistic read-only.
|
||||
|
||||
Cc: kvm-ppc@vger.kernel.org
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
arch/powerpc/kvm/book3s.c | 8 ++++----
|
||||
arch/x86/kvm/x86.c | 6 +++---
|
||||
include/linux/kvm_host.h | 2 ++
|
||||
virt/kvm/kvm_main.c | 10 +++++++---
|
||||
4 files changed, 16 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/arch/powerpc/kvm/book3s.c
|
||||
+++ b/arch/powerpc/kvm/book3s.c
|
||||
@@ -36,8 +36,8 @@
|
||||
#include "book3s.h"
|
||||
#include "trace.h"
|
||||
|
||||
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
|
||||
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
||||
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
|
||||
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
|
||||
|
||||
/* #define EXIT_DEBUG */
|
||||
|
||||
@@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_en
|
||||
{ "pthru_all", VCPU_STAT(pthru_all) },
|
||||
{ "pthru_host", VCPU_STAT(pthru_host) },
|
||||
{ "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
|
||||
- { "largepages_2M", VM_STAT(num_2M_pages) },
|
||||
- { "largepages_1G", VM_STAT(num_1G_pages) },
|
||||
+ { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) },
|
||||
+ { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~
|
||||
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
|
||||
#endif
|
||||
|
||||
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
|
||||
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
|
||||
+#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
|
||||
+#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
|
||||
|
||||
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
|
||||
KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
|
||||
@@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_en
|
||||
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
|
||||
{ "mmu_unsync", VM_STAT(mmu_unsync) },
|
||||
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
|
||||
- { "largepages", VM_STAT(lpages) },
|
||||
+ { "largepages", VM_STAT(lpages, .mode = 0444) },
|
||||
{ "max_mmu_page_hash_collisions",
|
||||
VM_STAT(max_mmu_page_hash_collisions) },
|
||||
{ NULL }
|
||||
--- a/include/linux/kvm_host.h
|
||||
+++ b/include/linux/kvm_host.h
|
||||
@@ -1090,6 +1090,7 @@ enum kvm_stat_kind {
|
||||
|
||||
struct kvm_stat_data {
|
||||
int offset;
|
||||
+ int mode;
|
||||
struct kvm *kvm;
|
||||
};
|
||||
|
||||
@@ -1097,6 +1098,7 @@ struct kvm_stats_debugfs_item {
|
||||
const char *name;
|
||||
int offset;
|
||||
enum kvm_stat_kind kind;
|
||||
+ int mode;
|
||||
};
|
||||
extern struct kvm_stats_debugfs_item debugfs_entries[];
|
||||
extern struct dentry *kvm_debugfs_dir;
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -617,8 +617,9 @@ static int kvm_create_vm_debugfs(struct
|
||||
|
||||
stat_data->kvm = kvm;
|
||||
stat_data->offset = p->offset;
|
||||
+ stat_data->mode = p->mode ? p->mode : 0644;
|
||||
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
|
||||
- debugfs_create_file(p->name, 0644, kvm->debugfs_dentry,
|
||||
+ debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry,
|
||||
stat_data, stat_fops_per_vm[p->kind]);
|
||||
}
|
||||
return 0;
|
||||
@@ -3930,7 +3931,9 @@ static int kvm_debugfs_open(struct inode
|
||||
if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
|
||||
return -ENOENT;
|
||||
|
||||
- if (simple_attr_open(inode, file, get, set, fmt)) {
|
||||
+ if (simple_attr_open(inode, file, get,
|
||||
+ stat_data->mode & S_IWUGO ? set : NULL,
|
||||
+ fmt)) {
|
||||
kvm_put_kvm(stat_data->kvm);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -4178,7 +4181,8 @@ static void kvm_init_debug(void)
|
||||
|
||||
kvm_debugfs_num_entries = 0;
|
||||
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
|
||||
- debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
|
||||
+ int mode = p->mode ? p->mode : 0644;
|
||||
+ debugfs_create_file(p->name, mode, kvm_debugfs_dir,
|
||||
(void *)(long)p->offset,
|
||||
stat_fops[p->kind]);
|
||||
}
|
||||
255
debian/patches/bugfix/x86/itlb_multihit/0001-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch
vendored
Normal file
255
debian/patches/bugfix/x86/itlb_multihit/0001-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch
vendored
Normal file
@@ -0,0 +1,255 @@
|
||||
From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
|
||||
Date: Mon, 4 Nov 2019 12:22:01 +0100
|
||||
Subject: [1/7] x86/bugs: Add ITLB_MULTIHIT bug infrastructure
|
||||
Origin: https://git.kernel.org/linus/db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
Some processors may incur a machine check error possibly resulting in an
|
||||
unrecoverable CPU lockup when an instruction fetch encounters a TLB
|
||||
multi-hit in the instruction TLB. This can occur when the page size is
|
||||
changed along with either the physical address or cache type. The relevant
|
||||
erratum can be found here:
|
||||
|
||||
https://bugzilla.kernel.org/show_bug.cgi?id=205195
|
||||
|
||||
There are other processors affected for which the erratum does not fully
|
||||
disclose the impact.
|
||||
|
||||
This issue affects both bare-metal x86 page tables and EPT.
|
||||
|
||||
It can be mitigated by either eliminating the use of large pages or by
|
||||
using careful TLB invalidations when changing the page size in the page
|
||||
tables.
|
||||
|
||||
Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in
|
||||
MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which
|
||||
are mitigated against this issue.
|
||||
|
||||
Signed-off-by: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
|
||||
Co-developed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
|
||||
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bwh: Backported to 5.3:
|
||||
- No support for ATOM_AIRMONT_NP
|
||||
- Adjust context]
|
||||
---
|
||||
.../ABI/testing/sysfs-devices-system-cpu | 1 +
|
||||
arch/x86/include/asm/cpufeatures.h | 1 +
|
||||
arch/x86/include/asm/msr-index.h | 7 ++
|
||||
arch/x86/kernel/cpu/bugs.c | 13 ++++
|
||||
arch/x86/kernel/cpu/common.c | 65 ++++++++++---------
|
||||
drivers/base/cpu.c | 8 +++
|
||||
include/linux/cpu.h | 2 +
|
||||
7 files changed, 67 insertions(+), 30 deletions(-)
|
||||
|
||||
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
|
||||
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
|
||||
@@ -487,6 +487,7 @@ What: /sys/devices/system/cpu/vulnerabi
|
||||
/sys/devices/system/cpu/vulnerabilities/l1tf
|
||||
/sys/devices/system/cpu/vulnerabilities/mds
|
||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||
+ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||
Date: January 2018
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: Information about CPU vulnerabilities
|
||||
--- a/arch/x86/include/asm/cpufeatures.h
|
||||
+++ b/arch/x86/include/asm/cpufeatures.h
|
||||
@@ -398,5 +398,6 @@
|
||||
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
|
||||
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
|
||||
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
|
||||
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
--- a/arch/x86/include/asm/msr-index.h
|
||||
+++ b/arch/x86/include/asm/msr-index.h
|
||||
@@ -93,6 +93,13 @@
|
||||
* Microarchitectural Data
|
||||
* Sampling (MDS) vulnerabilities.
|
||||
*/
|
||||
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
|
||||
+ * The processor is not susceptible to a
|
||||
+ * machine check error due to modifying the
|
||||
+ * code page size along with either the
|
||||
+ * physical address or cache type
|
||||
+ * without TLB invalidation.
|
||||
+ */
|
||||
#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
|
||||
#define ARCH_CAP_TAA_NO BIT(8) /*
|
||||
* Not susceptible to
|
||||
--- a/arch/x86/kernel/cpu/bugs.c
|
||||
+++ b/arch/x86/kernel/cpu/bugs.c
|
||||
@@ -1419,6 +1419,11 @@ static ssize_t l1tf_show_state(char *buf
|
||||
}
|
||||
#endif
|
||||
|
||||
+static ssize_t itlb_multihit_show_state(char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "Processor vulnerable\n");
|
||||
+}
|
||||
+
|
||||
static ssize_t mds_show_state(char *buf)
|
||||
{
|
||||
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
|
||||
@@ -1524,6 +1529,9 @@ static ssize_t cpu_show_common(struct de
|
||||
case X86_BUG_TAA:
|
||||
return tsx_async_abort_show_state(buf);
|
||||
|
||||
+ case X86_BUG_ITLB_MULTIHIT:
|
||||
+ return itlb_multihit_show_state(buf);
|
||||
+
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1565,4 +1573,9 @@ ssize_t cpu_show_tsx_async_abort(struct
|
||||
{
|
||||
return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
|
||||
}
|
||||
+
|
||||
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
+{
|
||||
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
|
||||
+}
|
||||
#endif
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(s
|
||||
#endif
|
||||
}
|
||||
|
||||
-#define NO_SPECULATION BIT(0)
|
||||
-#define NO_MELTDOWN BIT(1)
|
||||
-#define NO_SSB BIT(2)
|
||||
-#define NO_L1TF BIT(3)
|
||||
-#define NO_MDS BIT(4)
|
||||
-#define MSBDS_ONLY BIT(5)
|
||||
-#define NO_SWAPGS BIT(6)
|
||||
+#define NO_SPECULATION BIT(0)
|
||||
+#define NO_MELTDOWN BIT(1)
|
||||
+#define NO_SSB BIT(2)
|
||||
+#define NO_L1TF BIT(3)
|
||||
+#define NO_MDS BIT(4)
|
||||
+#define MSBDS_ONLY BIT(5)
|
||||
+#define NO_SWAPGS BIT(6)
|
||||
+#define NO_ITLB_MULTIHIT BIT(7)
|
||||
|
||||
#define VULNWL(_vendor, _family, _model, _whitelist) \
|
||||
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
|
||||
@@ -1043,26 +1044,26 @@ static const __initconst struct x86_cpu_
|
||||
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
|
||||
|
||||
/* Intel Family 6 */
|
||||
- VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
|
||||
- VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
|
||||
- VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
|
||||
- VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
|
||||
- VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
|
||||
-
|
||||
- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
+ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
|
||||
+
|
||||
+ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
|
||||
VULNWL_INTEL(CORE_YONAH, NO_SSB),
|
||||
|
||||
- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
|
||||
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
|
||||
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
|
||||
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
|
||||
/*
|
||||
* Technically, swapgs isn't serializing on AMD (despite it previously
|
||||
@@ -1073,14 +1074,14 @@ static const __initconst struct x86_cpu_
|
||||
*/
|
||||
|
||||
/* AMD Family 0xf - 0x12 */
|
||||
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
|
||||
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
|
||||
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
|
||||
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -1105,6 +1106,10 @@ static void __init cpu_set_bug_bits(stru
|
||||
{
|
||||
u64 ia32_cap = x86_read_arch_cap_msr();
|
||||
|
||||
+ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
|
||||
+ if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
|
||||
+ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
|
||||
+
|
||||
if (cpu_matches(NO_SPECULATION))
|
||||
return;
|
||||
|
||||
--- a/drivers/base/cpu.c
|
||||
+++ b/drivers/base/cpu.c
|
||||
@@ -561,6 +561,12 @@ ssize_t __weak cpu_show_tsx_async_abort(
|
||||
return sprintf(buf, "Not affected\n");
|
||||
}
|
||||
|
||||
+ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "Not affected\n");
|
||||
+}
|
||||
+
|
||||
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
|
||||
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
|
||||
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
|
||||
@@ -568,6 +574,7 @@ static DEVICE_ATTR(spec_store_bypass, 04
|
||||
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
|
||||
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
|
||||
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
|
||||
+static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
|
||||
|
||||
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
|
||||
&dev_attr_meltdown.attr,
|
||||
@@ -577,6 +584,7 @@ static struct attribute *cpu_root_vulner
|
||||
&dev_attr_l1tf.attr,
|
||||
&dev_attr_mds.attr,
|
||||
&dev_attr_tsx_async_abort.attr,
|
||||
+ &dev_attr_itlb_multihit.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
--- a/include/linux/cpu.h
|
||||
+++ b/include/linux/cpu.h
|
||||
@@ -62,6 +62,8 @@ extern ssize_t cpu_show_mds(struct devic
|
||||
extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf);
|
||||
+extern ssize_t cpu_show_itlb_multihit(struct device *dev,
|
||||
+ struct device_attribute *attr, char *buf);
|
||||
|
||||
extern __printf(4, 5)
|
||||
struct device *cpu_device_create(struct device *parent, void *drvdata,
|
||||
@@ -0,0 +1,30 @@
|
||||
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
|
||||
Date: Mon, 4 Nov 2019 12:22:01 +0100
|
||||
Subject: [2/7] x86/cpu: Add Tremont to the cpu vulnerability whitelist
|
||||
Origin: https://git.kernel.org/linus/cad14885a8d32c1c0d8eaa7bf5c0152a22b6080e
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
Add the new cpu family ATOM_TREMONT_D to the cpu vunerability
|
||||
whitelist. ATOM_TREMONT_D is not affected by X86_BUG_ITLB_MULTIHIT.
|
||||
|
||||
ATOM_TREMONT_D might have mitigations against other issues as well, but
|
||||
only the ITLB multihit mitigation is confirmed at this point.
|
||||
|
||||
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/kernel/cpu/common.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/x86/kernel/cpu/common.c
|
||||
+++ b/arch/x86/kernel/cpu/common.c
|
||||
@@ -1073,6 +1073,8 @@ static const __initconst struct x86_cpu_
|
||||
* good enough for our purposes.
|
||||
*/
|
||||
|
||||
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
|
||||
+
|
||||
/* AMD Family 0xf - 0x12 */
|
||||
VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
|
||||
@@ -0,0 +1,97 @@
|
||||
From: Tyler Hicks <tyhicks@canonical.com>
|
||||
Date: Mon, 4 Nov 2019 12:22:02 +0100
|
||||
Subject: [3/7] cpu/speculation: Uninline and export CPU mitigations helpers
|
||||
Origin: https://git.kernel.org/linus/731dc9df975a5da21237a18c3384f811a7a41cc6
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
A kernel module may need to check the value of the "mitigations=" kernel
|
||||
command line parameter as part of its setup when the module needs
|
||||
to perform software mitigations for a CPU flaw.
|
||||
|
||||
Uninline and export the helper functions surrounding the cpu_mitigations
|
||||
enum to allow for their usage from a module.
|
||||
|
||||
Lastly, privatize the enum and cpu_mitigations variable since the value of
|
||||
cpu_mitigations can be checked with the exported helper functions.
|
||||
|
||||
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/cpu.h | 25 ++-----------------------
|
||||
kernel/cpu.c | 27 ++++++++++++++++++++++++++-
|
||||
2 files changed, 28 insertions(+), 24 deletions(-)
|
||||
|
||||
--- a/include/linux/cpu.h
|
||||
+++ b/include/linux/cpu.h
|
||||
@@ -216,28 +216,7 @@ static inline int cpuhp_smt_enable(void)
|
||||
static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
|
||||
#endif
|
||||
|
||||
-/*
|
||||
- * These are used for a global "mitigations=" cmdline option for toggling
|
||||
- * optional CPU mitigations.
|
||||
- */
|
||||
-enum cpu_mitigations {
|
||||
- CPU_MITIGATIONS_OFF,
|
||||
- CPU_MITIGATIONS_AUTO,
|
||||
- CPU_MITIGATIONS_AUTO_NOSMT,
|
||||
-};
|
||||
-
|
||||
-extern enum cpu_mitigations cpu_mitigations;
|
||||
-
|
||||
-/* mitigations=off */
|
||||
-static inline bool cpu_mitigations_off(void)
|
||||
-{
|
||||
- return cpu_mitigations == CPU_MITIGATIONS_OFF;
|
||||
-}
|
||||
-
|
||||
-/* mitigations=auto,nosmt */
|
||||
-static inline bool cpu_mitigations_auto_nosmt(void)
|
||||
-{
|
||||
- return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
|
||||
-}
|
||||
+extern bool cpu_mitigations_off(void);
|
||||
+extern bool cpu_mitigations_auto_nosmt(void);
|
||||
|
||||
#endif /* _LINUX_CPU_H_ */
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -2339,7 +2339,18 @@ void __init boot_cpu_hotplug_init(void)
|
||||
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
|
||||
}
|
||||
|
||||
-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
|
||||
+/*
|
||||
+ * These are used for a global "mitigations=" cmdline option for toggling
|
||||
+ * optional CPU mitigations.
|
||||
+ */
|
||||
+enum cpu_mitigations {
|
||||
+ CPU_MITIGATIONS_OFF,
|
||||
+ CPU_MITIGATIONS_AUTO,
|
||||
+ CPU_MITIGATIONS_AUTO_NOSMT,
|
||||
+};
|
||||
+
|
||||
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
|
||||
+ CPU_MITIGATIONS_AUTO;
|
||||
|
||||
static int __init mitigations_parse_cmdline(char *arg)
|
||||
{
|
||||
@@ -2356,3 +2367,17 @@ static int __init mitigations_parse_cmdl
|
||||
return 0;
|
||||
}
|
||||
early_param("mitigations", mitigations_parse_cmdline);
|
||||
+
|
||||
+/* mitigations=off */
|
||||
+bool cpu_mitigations_off(void)
|
||||
+{
|
||||
+ return cpu_mitigations == CPU_MITIGATIONS_OFF;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
|
||||
+
|
||||
+/* mitigations=auto,nosmt */
|
||||
+bool cpu_mitigations_auto_nosmt(void)
|
||||
+{
|
||||
+ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
|
||||
490
debian/patches/bugfix/x86/itlb_multihit/0004-kvm-mmu-ITLB_MULTIHIT-mitigation.patch
vendored
Normal file
490
debian/patches/bugfix/x86/itlb_multihit/0004-kvm-mmu-ITLB_MULTIHIT-mitigation.patch
vendored
Normal file
@@ -0,0 +1,490 @@
|
||||
From: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Date: Mon, 4 Nov 2019 12:22:02 +0100
|
||||
Subject: [4/7] kvm: mmu: ITLB_MULTIHIT mitigation
|
||||
Origin: https://git.kernel.org/linus/b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
With some Intel processors, putting the same virtual address in the TLB
|
||||
as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit
|
||||
and cause the processor to issue a machine check resulting in a CPU lockup.
|
||||
|
||||
Unfortunately when EPT page tables use huge pages, it is possible for a
|
||||
malicious guest to cause this situation.
|
||||
|
||||
Add a knob to mark huge pages as non-executable. When the nx_huge_pages
|
||||
parameter is enabled (and we are using EPT), all huge pages are marked as
|
||||
NX. If the guest attempts to execute in one of those pages, the page is
|
||||
broken down into 4K pages, which are then marked executable.
|
||||
|
||||
This is not an issue for shadow paging (except nested EPT), because then
|
||||
the host is in control of TLB flushes and the problematic situation cannot
|
||||
happen. With nested EPT, again the nested guest can cause problems shadow
|
||||
and direct EPT is treated in the same way.
|
||||
|
||||
[ tglx: Fixup default to auto and massage wording a bit ]
|
||||
|
||||
Originally-by: Junaid Shahid <junaids@google.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
.../admin-guide/kernel-parameters.txt | 19 +++
|
||||
arch/x86/include/asm/kvm_host.h | 2 +
|
||||
arch/x86/kernel/cpu/bugs.c | 13 +-
|
||||
arch/x86/kvm/mmu.c | 141 +++++++++++++++++-
|
||||
arch/x86/kvm/paging_tmpl.h | 29 +++-
|
||||
arch/x86/kvm/x86.c | 9 ++
|
||||
6 files changed, 200 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2042,6 +2042,19 @@
|
||||
KVM MMU at runtime.
|
||||
Default is 0 (off)
|
||||
|
||||
+ kvm.nx_huge_pages=
|
||||
+ [KVM] Controls the software workaround for the
|
||||
+ X86_BUG_ITLB_MULTIHIT bug.
|
||||
+ force : Always deploy workaround.
|
||||
+ off : Never deploy workaround.
|
||||
+ auto : Deploy workaround based on the presence of
|
||||
+ X86_BUG_ITLB_MULTIHIT.
|
||||
+
|
||||
+ Default is 'auto'.
|
||||
+
|
||||
+ If the software workaround is enabled for the host,
|
||||
+ guests do need not to enable it for nested guests.
|
||||
+
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 1 (enabled)
|
||||
|
||||
@@ -2615,6 +2628,12 @@
|
||||
l1tf=off [X86]
|
||||
mds=off [X86]
|
||||
tsx_async_abort=off [X86]
|
||||
+ kvm.nx_huge_pages=off [X86]
|
||||
+
|
||||
+ Exceptions:
|
||||
+ This does not have any effect on
|
||||
+ kvm.nx_huge_pages when
|
||||
+ kvm.nx_huge_pages=force.
|
||||
|
||||
auto (default)
|
||||
Mitigate all CPU vulnerabilities, but leave SMT
|
||||
--- a/arch/x86/include/asm/kvm_host.h
|
||||
+++ b/arch/x86/include/asm/kvm_host.h
|
||||
@@ -321,6 +321,7 @@ struct kvm_mmu_page {
|
||||
struct hlist_node hash_link;
|
||||
bool unsync;
|
||||
bool mmio_cached;
|
||||
+ bool lpage_disallowed; /* Can't be replaced by an equiv large page */
|
||||
|
||||
/*
|
||||
* The following two entries are used to key the shadow page in the
|
||||
@@ -950,6 +951,7 @@ struct kvm_vm_stat {
|
||||
ulong mmu_unsync;
|
||||
ulong remote_tlb_flush;
|
||||
ulong lpages;
|
||||
+ ulong nx_lpage_splits;
|
||||
ulong max_mmu_page_hash_collisions;
|
||||
};
|
||||
|
||||
--- a/arch/x86/kernel/cpu/bugs.c
|
||||
+++ b/arch/x86/kernel/cpu/bugs.c
|
||||
@@ -1257,6 +1257,9 @@ void x86_spec_ctrl_setup_ap(void)
|
||||
x86_amd_ssb_disable();
|
||||
}
|
||||
|
||||
+bool itlb_multihit_kvm_mitigation;
|
||||
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
|
||||
+
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "L1TF: " fmt
|
||||
|
||||
@@ -1412,17 +1415,25 @@ static ssize_t l1tf_show_state(char *buf
|
||||
l1tf_vmx_states[l1tf_vmx_mitigation],
|
||||
sched_smt_active() ? "vulnerable" : "disabled");
|
||||
}
|
||||
+
|
||||
+static ssize_t itlb_multihit_show_state(char *buf)
|
||||
+{
|
||||
+ if (itlb_multihit_kvm_mitigation)
|
||||
+ return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
|
||||
+ else
|
||||
+ return sprintf(buf, "KVM: Vulnerable\n");
|
||||
+}
|
||||
#else
|
||||
static ssize_t l1tf_show_state(char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
|
||||
}
|
||||
-#endif
|
||||
|
||||
static ssize_t itlb_multihit_show_state(char *buf)
|
||||
{
|
||||
return sprintf(buf, "Processor vulnerable\n");
|
||||
}
|
||||
+#endif
|
||||
|
||||
static ssize_t mds_show_state(char *buf)
|
||||
{
|
||||
--- a/arch/x86/kvm/mmu.c
|
||||
+++ b/arch/x86/kvm/mmu.c
|
||||
@@ -47,6 +47,20 @@
|
||||
#include <asm/kvm_page_track.h>
|
||||
#include "trace.h"
|
||||
|
||||
+extern bool itlb_multihit_kvm_mitigation;
|
||||
+
|
||||
+static int __read_mostly nx_huge_pages = -1;
|
||||
+
|
||||
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
|
||||
+
|
||||
+static struct kernel_param_ops nx_huge_pages_ops = {
|
||||
+ .set = set_nx_huge_pages,
|
||||
+ .get = param_get_bool,
|
||||
+};
|
||||
+
|
||||
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
|
||||
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
|
||||
+
|
||||
/*
|
||||
* When setting this variable to true it enables Two-Dimensional-Paging
|
||||
* where the hardware walks 2 page tables:
|
||||
@@ -318,6 +332,11 @@ static inline bool spte_ad_enabled(u64 s
|
||||
return !(spte & shadow_acc_track_value);
|
||||
}
|
||||
|
||||
+static bool is_nx_huge_page_enabled(void)
|
||||
+{
|
||||
+ return READ_ONCE(nx_huge_pages);
|
||||
+}
|
||||
+
|
||||
static inline u64 spte_shadow_accessed_mask(u64 spte)
|
||||
{
|
||||
MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
|
||||
@@ -1162,6 +1181,15 @@ static void account_shadowed(struct kvm
|
||||
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
||||
}
|
||||
|
||||
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
+{
|
||||
+ if (sp->lpage_disallowed)
|
||||
+ return;
|
||||
+
|
||||
+ ++kvm->stat.nx_lpage_splits;
|
||||
+ sp->lpage_disallowed = true;
|
||||
+}
|
||||
+
|
||||
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
@@ -1179,6 +1207,12 @@ static void unaccount_shadowed(struct kv
|
||||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
|
||||
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
+{
|
||||
+ --kvm->stat.nx_lpage_splits;
|
||||
+ sp->lpage_disallowed = false;
|
||||
+}
|
||||
+
|
||||
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
|
||||
struct kvm_memory_slot *slot)
|
||||
{
|
||||
@@ -2753,6 +2787,9 @@ static bool __kvm_mmu_prepare_zap_page(s
|
||||
kvm_reload_remote_mmus(kvm);
|
||||
}
|
||||
|
||||
+ if (sp->lpage_disallowed)
|
||||
+ unaccount_huge_nx_page(kvm, sp);
|
||||
+
|
||||
sp->role.invalid = 1;
|
||||
return list_unstable;
|
||||
}
|
||||
@@ -2972,6 +3009,11 @@ static int set_spte(struct kvm_vcpu *vcp
|
||||
if (!speculative)
|
||||
spte |= spte_shadow_accessed_mask(spte);
|
||||
|
||||
+ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
|
||||
+ is_nx_huge_page_enabled()) {
|
||||
+ pte_access &= ~ACC_EXEC_MASK;
|
||||
+ }
|
||||
+
|
||||
if (pte_access & ACC_EXEC_MASK)
|
||||
spte |= shadow_x_mask;
|
||||
else
|
||||
@@ -3192,9 +3234,32 @@ static void direct_pte_prefetch(struct k
|
||||
__direct_pte_prefetch(vcpu, sp, sptep);
|
||||
}
|
||||
|
||||
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
|
||||
+ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
|
||||
+{
|
||||
+ int level = *levelp;
|
||||
+ u64 spte = *it.sptep;
|
||||
+
|
||||
+ if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
|
||||
+ is_nx_huge_page_enabled() &&
|
||||
+ is_shadow_present_pte(spte) &&
|
||||
+ !is_large_pte(spte)) {
|
||||
+ /*
|
||||
+ * A small SPTE exists for this pfn, but FNAME(fetch)
|
||||
+ * and __direct_map would like to create a large PTE
|
||||
+ * instead: just force them to go down another level,
|
||||
+ * patching back for them into pfn the next 9 bits of
|
||||
+ * the address.
|
||||
+ */
|
||||
+ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
|
||||
+ *pfnp |= gfn & page_mask;
|
||||
+ (*levelp)--;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
|
||||
int map_writable, int level, kvm_pfn_t pfn,
|
||||
- bool prefault)
|
||||
+ bool prefault, bool lpage_disallowed)
|
||||
{
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
struct kvm_mmu_page *sp;
|
||||
@@ -3207,6 +3272,12 @@ static int __direct_map(struct kvm_vcpu
|
||||
|
||||
trace_kvm_mmu_spte_requested(gpa, level, pfn);
|
||||
for_each_shadow_entry(vcpu, gpa, it) {
|
||||
+ /*
|
||||
+ * We cannot overwrite existing page tables with an NX
|
||||
+ * large page, as the leaf could be executable.
|
||||
+ */
|
||||
+ disallowed_hugepage_adjust(it, gfn, &pfn, &level);
|
||||
+
|
||||
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == level)
|
||||
break;
|
||||
@@ -3217,6 +3288,8 @@ static int __direct_map(struct kvm_vcpu
|
||||
it.level - 1, true, ACC_ALL);
|
||||
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
+ if (lpage_disallowed)
|
||||
+ account_huge_nx_page(vcpu->kvm, sp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3508,11 +3581,14 @@ static int nonpaging_map(struct kvm_vcpu
|
||||
{
|
||||
int r;
|
||||
int level;
|
||||
- bool force_pt_level = false;
|
||||
+ bool force_pt_level;
|
||||
kvm_pfn_t pfn;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable, write = error_code & PFERR_WRITE_MASK;
|
||||
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
||||
+ is_nx_huge_page_enabled();
|
||||
|
||||
+ force_pt_level = lpage_disallowed;
|
||||
level = mapping_level(vcpu, gfn, &force_pt_level);
|
||||
if (likely(!force_pt_level)) {
|
||||
/*
|
||||
@@ -3546,7 +3622,8 @@ static int nonpaging_map(struct kvm_vcpu
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
||||
- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
|
||||
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn,
|
||||
+ prefault, false);
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
@@ -4132,6 +4209,8 @@ static int tdp_page_fault(struct kvm_vcp
|
||||
unsigned long mmu_seq;
|
||||
int write = error_code & PFERR_WRITE_MASK;
|
||||
bool map_writable;
|
||||
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
||||
+ is_nx_huge_page_enabled();
|
||||
|
||||
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
|
||||
|
||||
@@ -4142,8 +4221,9 @@ static int tdp_page_fault(struct kvm_vcp
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
|
||||
- PT_DIRECTORY_LEVEL);
|
||||
+ force_pt_level =
|
||||
+ lpage_disallowed ||
|
||||
+ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
|
||||
level = mapping_level(vcpu, gfn, &force_pt_level);
|
||||
if (likely(!force_pt_level)) {
|
||||
if (level > PT_DIRECTORY_LEVEL &&
|
||||
@@ -4172,7 +4252,8 @@ static int tdp_page_fault(struct kvm_vcp
|
||||
goto out_unlock;
|
||||
if (likely(!force_pt_level))
|
||||
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
|
||||
- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
|
||||
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
|
||||
+ prefault, lpage_disallowed);
|
||||
out_unlock:
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
@@ -6099,10 +6180,58 @@ static void kvm_set_mmio_spte_mask(void)
|
||||
kvm_mmu_set_mmio_spte_mask(mask, mask);
|
||||
}
|
||||
|
||||
+static bool get_nx_auto_mode(void)
|
||||
+{
|
||||
+ /* Return true when CPU has the bug, and mitigations are ON */
|
||||
+ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
|
||||
+}
|
||||
+
|
||||
+static void __set_nx_huge_pages(bool val)
|
||||
+{
|
||||
+ nx_huge_pages = itlb_multihit_kvm_mitigation = val;
|
||||
+}
|
||||
+
|
||||
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
|
||||
+{
|
||||
+ bool old_val = nx_huge_pages;
|
||||
+ bool new_val;
|
||||
+
|
||||
+ /* In "auto" mode deploy workaround only if CPU has the bug. */
|
||||
+ if (sysfs_streq(val, "off"))
|
||||
+ new_val = 0;
|
||||
+ else if (sysfs_streq(val, "force"))
|
||||
+ new_val = 1;
|
||||
+ else if (sysfs_streq(val, "auto"))
|
||||
+ new_val = get_nx_auto_mode();
|
||||
+ else if (strtobool(val, &new_val) < 0)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ __set_nx_huge_pages(new_val);
|
||||
+
|
||||
+ if (new_val != old_val) {
|
||||
+ struct kvm *kvm;
|
||||
+ int idx;
|
||||
+
|
||||
+ mutex_lock(&kvm_lock);
|
||||
+
|
||||
+ list_for_each_entry(kvm, &vm_list, vm_list) {
|
||||
+ idx = srcu_read_lock(&kvm->srcu);
|
||||
+ kvm_mmu_zap_all_fast(kvm);
|
||||
+ srcu_read_unlock(&kvm->srcu, idx);
|
||||
+ }
|
||||
+ mutex_unlock(&kvm_lock);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int kvm_mmu_module_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
||||
+ if (nx_huge_pages == -1)
|
||||
+ __set_nx_huge_pages(get_nx_auto_mode());
|
||||
+
|
||||
/*
|
||||
* MMU roles use union aliasing which is, generally speaking, an
|
||||
* undefined behavior. However, we supposedly know how compilers behave
|
||||
--- a/arch/x86/kvm/paging_tmpl.h
|
||||
+++ b/arch/x86/kvm/paging_tmpl.h
|
||||
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct k
|
||||
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
|
||||
struct guest_walker *gw,
|
||||
int write_fault, int hlevel,
|
||||
- kvm_pfn_t pfn, bool map_writable, bool prefault)
|
||||
+ kvm_pfn_t pfn, bool map_writable, bool prefault,
|
||||
+ bool lpage_disallowed)
|
||||
{
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
int top_level, ret;
|
||||
- gfn_t base_gfn;
|
||||
+ gfn_t gfn, base_gfn;
|
||||
|
||||
direct_access = gw->pte_access;
|
||||
|
||||
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
}
|
||||
|
||||
- base_gfn = gw->gfn;
|
||||
+ /*
|
||||
+ * FNAME(page_fault) might have clobbered the bottom bits of
|
||||
+ * gw->gfn, restore them from the virtual address.
|
||||
+ */
|
||||
+ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
|
||||
+ base_gfn = gfn;
|
||||
|
||||
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
|
||||
|
||||
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
|
||||
clear_sp_write_flooding_count(it.sptep);
|
||||
- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
+
|
||||
+ /*
|
||||
+ * We cannot overwrite existing page tables with an NX
|
||||
+ * large page, as the leaf could be executable.
|
||||
+ */
|
||||
+ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
|
||||
+
|
||||
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
|
||||
if (it.level == hlevel)
|
||||
break;
|
||||
|
||||
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu
|
||||
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
|
||||
it.level - 1, true, direct_access);
|
||||
link_shadow_page(vcpu, it.sptep, sp);
|
||||
+ if (lpage_disallowed)
|
||||
+ account_huge_nx_page(vcpu->kvm, sp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_
|
||||
int r;
|
||||
kvm_pfn_t pfn;
|
||||
int level = PT_PAGE_TABLE_LEVEL;
|
||||
- bool force_pt_level = false;
|
||||
unsigned long mmu_seq;
|
||||
bool map_writable, is_self_change_mapping;
|
||||
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
|
||||
+ is_nx_huge_page_enabled();
|
||||
+ bool force_pt_level = lpage_disallowed;
|
||||
|
||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||
|
||||
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_
|
||||
if (!force_pt_level)
|
||||
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
|
||||
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
|
||||
- level, pfn, map_writable, prefault);
|
||||
+ level, pfn, map_writable, prefault, lpage_disallowed);
|
||||
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
|
||||
|
||||
out_unlock:
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_en
|
||||
{ "mmu_unsync", VM_STAT(mmu_unsync) },
|
||||
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
|
||||
{ "largepages", VM_STAT(lpages, .mode = 0444) },
|
||||
+ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
|
||||
{ "max_mmu_page_hash_collisions",
|
||||
VM_STAT(max_mmu_page_hash_collisions) },
|
||||
{ NULL }
|
||||
@@ -1256,6 +1257,14 @@ static u64 kvm_get_arch_capabilities(voi
|
||||
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
|
||||
|
||||
/*
|
||||
+ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
|
||||
+ * the nested hypervisor runs with NX huge pages. If it is not,
|
||||
+ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
|
||||
+ * L1 guests, so it need not worry about its own (L2) guests.
|
||||
+ */
|
||||
+ data |= ARCH_CAP_PSCHANGE_MC_NO;
|
||||
+
|
||||
+ /*
|
||||
* If we're doing cache flushes (either "always" or "cond")
|
||||
* we will do one whenever the guest does a vmlaunch/vmresume.
|
||||
* If an outer hypervisor is doing the cache flush for us
|
||||
@@ -0,0 +1,128 @@
|
||||
From: Junaid Shahid <junaids@google.com>
|
||||
Date: Mon, 4 Nov 2019 12:22:02 +0100
|
||||
Subject: [5/7] kvm: Add helper function for creating VM worker threads
|
||||
Origin: https://git.kernel.org/linus/c57c80467f90e5504c8df9ad3555d2c78800bf94
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
Add a function to create a kernel thread associated with a given VM. In
|
||||
particular, it ensures that the worker thread inherits the priority and
|
||||
cgroups of the calling thread.
|
||||
|
||||
Signed-off-by: Junaid Shahid <junaids@google.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/kvm_host.h | 6 +++
|
||||
virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 90 insertions(+)
|
||||
|
||||
--- a/include/linux/kvm_host.h
|
||||
+++ b/include/linux/kvm_host.h
|
||||
@@ -1382,4 +1382,10 @@ static inline int kvm_arch_vcpu_run_pid_
|
||||
}
|
||||
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
|
||||
|
||||
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
|
||||
+
|
||||
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
|
||||
+ uintptr_t data, const char *name,
|
||||
+ struct task_struct **thread_ptr);
|
||||
+
|
||||
#endif
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -50,6 +50,7 @@
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/lockdep.h>
|
||||
+#include <linux/kthread.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/ioctl.h>
|
||||
@@ -4365,3 +4366,86 @@ void kvm_exit(void)
|
||||
kvm_vfio_ops_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_exit);
|
||||
+
|
||||
+struct kvm_vm_worker_thread_context {
|
||||
+ struct kvm *kvm;
|
||||
+ struct task_struct *parent;
|
||||
+ struct completion init_done;
|
||||
+ kvm_vm_thread_fn_t thread_fn;
|
||||
+ uintptr_t data;
|
||||
+ int err;
|
||||
+};
|
||||
+
|
||||
+static int kvm_vm_worker_thread(void *context)
|
||||
+{
|
||||
+ /*
|
||||
+ * The init_context is allocated on the stack of the parent thread, so
|
||||
+ * we have to locally copy anything that is needed beyond initialization
|
||||
+ */
|
||||
+ struct kvm_vm_worker_thread_context *init_context = context;
|
||||
+ struct kvm *kvm = init_context->kvm;
|
||||
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
|
||||
+ uintptr_t data = init_context->data;
|
||||
+ int err;
|
||||
+
|
||||
+ err = kthread_park(current);
|
||||
+ /* kthread_park(current) is never supposed to return an error */
|
||||
+ WARN_ON(err != 0);
|
||||
+ if (err)
|
||||
+ goto init_complete;
|
||||
+
|
||||
+ err = cgroup_attach_task_all(init_context->parent, current);
|
||||
+ if (err) {
|
||||
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
|
||||
+ __func__, err);
|
||||
+ goto init_complete;
|
||||
+ }
|
||||
+
|
||||
+ set_user_nice(current, task_nice(init_context->parent));
|
||||
+
|
||||
+init_complete:
|
||||
+ init_context->err = err;
|
||||
+ complete(&init_context->init_done);
|
||||
+ init_context = NULL;
|
||||
+
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ /* Wait to be woken up by the spawner before proceeding. */
|
||||
+ kthread_parkme();
|
||||
+
|
||||
+ if (!kthread_should_stop())
|
||||
+ err = thread_fn(kvm, data);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
|
||||
+ uintptr_t data, const char *name,
|
||||
+ struct task_struct **thread_ptr)
|
||||
+{
|
||||
+ struct kvm_vm_worker_thread_context init_context = {};
|
||||
+ struct task_struct *thread;
|
||||
+
|
||||
+ *thread_ptr = NULL;
|
||||
+ init_context.kvm = kvm;
|
||||
+ init_context.parent = current;
|
||||
+ init_context.thread_fn = thread_fn;
|
||||
+ init_context.data = data;
|
||||
+ init_completion(&init_context.init_done);
|
||||
+
|
||||
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
|
||||
+ "%s-%d", name, task_pid_nr(current));
|
||||
+ if (IS_ERR(thread))
|
||||
+ return PTR_ERR(thread);
|
||||
+
|
||||
+ /* kthread_run is never supposed to return NULL */
|
||||
+ WARN_ON(thread == NULL);
|
||||
+
|
||||
+ wait_for_completion(&init_context.init_done);
|
||||
+
|
||||
+ if (!init_context.err)
|
||||
+ *thread_ptr = thread;
|
||||
+
|
||||
+ return init_context.err;
|
||||
+}
|
||||
360
debian/patches/bugfix/x86/itlb_multihit/0006-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch
vendored
Normal file
360
debian/patches/bugfix/x86/itlb_multihit/0006-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch
vendored
Normal file
@@ -0,0 +1,360 @@
|
||||
From: Junaid Shahid <junaids@google.com>
|
||||
Date: Mon, 4 Nov 2019 20:26:00 +0100
|
||||
Subject: [6/7] kvm: x86: mmu: Recovery of shattered NX large pages
|
||||
Origin: https://git.kernel.org/linus/1aa9b9572b10529c2e64e2b8f44025d86e124308
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
The page table pages corresponding to broken down large pages are zapped in
|
||||
FIFO order, so that the large page can potentially be recovered, if it is
|
||||
not longer being used for execution. This removes the performance penalty
|
||||
for walking deeper EPT page tables.
|
||||
|
||||
By default, one large page will last about one hour once the guest
|
||||
reaches a steady state.
|
||||
|
||||
Signed-off-by: Junaid Shahid <junaids@google.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bwh: Backported to 5.3:
|
||||
- Update another error path in kvm_create_vm() to use out_err_no_mmu_notifier
|
||||
- Adjust context]
|
||||
---
|
||||
.../admin-guide/kernel-parameters.txt | 6 +
|
||||
arch/x86/include/asm/kvm_host.h | 4 +
|
||||
arch/x86/kvm/mmu.c | 129 ++++++++++++++++++
|
||||
arch/x86/kvm/mmu.h | 4 +
|
||||
arch/x86/kvm/x86.c | 11 ++
|
||||
virt/kvm/kvm_main.c | 28 ++++
|
||||
6 files changed, 182 insertions(+)
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2055,6 +2055,12 @@
|
||||
If the software workaround is enabled for the host,
|
||||
guests do need not to enable it for nested guests.
|
||||
|
||||
+ kvm.nx_huge_pages_recovery_ratio=
|
||||
+ [KVM] Controls how many 4KiB pages are periodically zapped
|
||||
+ back to huge pages. 0 disables the recovery, otherwise if
|
||||
+ the value is N KVM will zap 1/Nth of the 4KiB pages every
|
||||
+ minute. The default is 60.
|
||||
+
|
||||
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
|
||||
Default is 1 (enabled)
|
||||
|
||||
--- a/arch/x86/include/asm/kvm_host.h
|
||||
+++ b/arch/x86/include/asm/kvm_host.h
|
||||
@@ -319,6 +319,8 @@ struct kvm_rmap_head {
|
||||
struct kvm_mmu_page {
|
||||
struct list_head link;
|
||||
struct hlist_node hash_link;
|
||||
+ struct list_head lpage_disallowed_link;
|
||||
+
|
||||
bool unsync;
|
||||
bool mmio_cached;
|
||||
bool lpage_disallowed; /* Can't be replaced by an equiv large page */
|
||||
@@ -864,6 +866,7 @@ struct kvm_arch {
|
||||
* Hash table of struct kvm_mmu_page.
|
||||
*/
|
||||
struct list_head active_mmu_pages;
|
||||
+ struct list_head lpage_disallowed_mmu_pages;
|
||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||
struct kvm_page_track_notifier_head track_notifier_head;
|
||||
|
||||
@@ -938,6 +941,7 @@ struct kvm_arch {
|
||||
bool exception_payload_enabled;
|
||||
|
||||
struct kvm_pmu_event_filter *pmu_event_filter;
|
||||
+ struct task_struct *nx_lpage_recovery_thread;
|
||||
};
|
||||
|
||||
struct kvm_vm_stat {
|
||||
--- a/arch/x86/kvm/mmu.c
|
||||
+++ b/arch/x86/kvm/mmu.c
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/kern_levels.h>
|
||||
+#include <linux/kthread.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pat.h>
|
||||
@@ -50,16 +51,26 @@
|
||||
extern bool itlb_multihit_kvm_mitigation;
|
||||
|
||||
static int __read_mostly nx_huge_pages = -1;
|
||||
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
|
||||
|
||||
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
|
||||
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
|
||||
|
||||
static struct kernel_param_ops nx_huge_pages_ops = {
|
||||
.set = set_nx_huge_pages,
|
||||
.get = param_get_bool,
|
||||
};
|
||||
|
||||
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
|
||||
+ .set = set_nx_huge_pages_recovery_ratio,
|
||||
+ .get = param_get_uint,
|
||||
+};
|
||||
+
|
||||
module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
|
||||
__MODULE_PARM_TYPE(nx_huge_pages, "bool");
|
||||
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
|
||||
+ &nx_huge_pages_recovery_ratio, 0644);
|
||||
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
|
||||
|
||||
/*
|
||||
* When setting this variable to true it enables Two-Dimensional-Paging
|
||||
@@ -1187,6 +1198,8 @@ static void account_huge_nx_page(struct
|
||||
return;
|
||||
|
||||
++kvm->stat.nx_lpage_splits;
|
||||
+ list_add_tail(&sp->lpage_disallowed_link,
|
||||
+ &kvm->arch.lpage_disallowed_mmu_pages);
|
||||
sp->lpage_disallowed = true;
|
||||
}
|
||||
|
||||
@@ -1211,6 +1224,7 @@ static void unaccount_huge_nx_page(struc
|
||||
{
|
||||
--kvm->stat.nx_lpage_splits;
|
||||
sp->lpage_disallowed = false;
|
||||
+ list_del(&sp->lpage_disallowed_link);
|
||||
}
|
||||
|
||||
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
|
||||
@@ -6218,6 +6232,8 @@ static int set_nx_huge_pages(const char
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
kvm_mmu_zap_all_fast(kvm);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
+
|
||||
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
|
||||
}
|
||||
mutex_unlock(&kvm_lock);
|
||||
}
|
||||
@@ -6311,3 +6327,116 @@ void kvm_mmu_module_exit(void)
|
||||
unregister_shrinker(&mmu_shrinker);
|
||||
mmu_audit_disable();
|
||||
}
|
||||
+
|
||||
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
|
||||
+{
|
||||
+ unsigned int old_val;
|
||||
+ int err;
|
||||
+
|
||||
+ old_val = nx_huge_pages_recovery_ratio;
|
||||
+ err = param_set_uint(val, kp);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ if (READ_ONCE(nx_huge_pages) &&
|
||||
+ !old_val && nx_huge_pages_recovery_ratio) {
|
||||
+ struct kvm *kvm;
|
||||
+
|
||||
+ mutex_lock(&kvm_lock);
|
||||
+
|
||||
+ list_for_each_entry(kvm, &vm_list, vm_list)
|
||||
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
|
||||
+
|
||||
+ mutex_unlock(&kvm_lock);
|
||||
+ }
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void kvm_recover_nx_lpages(struct kvm *kvm)
|
||||
+{
|
||||
+ int rcu_idx;
|
||||
+ struct kvm_mmu_page *sp;
|
||||
+ unsigned int ratio;
|
||||
+ LIST_HEAD(invalid_list);
|
||||
+ ulong to_zap;
|
||||
+
|
||||
+ rcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
+ spin_lock(&kvm->mmu_lock);
|
||||
+
|
||||
+ ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
+ to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
|
||||
+ while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
|
||||
+ /*
|
||||
+ * We use a separate list instead of just using active_mmu_pages
|
||||
+ * because the number of lpage_disallowed pages is expected to
|
||||
+ * be relatively small compared to the total.
|
||||
+ */
|
||||
+ sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
|
||||
+ struct kvm_mmu_page,
|
||||
+ lpage_disallowed_link);
|
||||
+ WARN_ON_ONCE(!sp->lpage_disallowed);
|
||||
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
+ WARN_ON_ONCE(sp->lpage_disallowed);
|
||||
+
|
||||
+ if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
||||
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||
+ if (to_zap)
|
||||
+ cond_resched_lock(&kvm->mmu_lock);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ spin_unlock(&kvm->mmu_lock);
|
||||
+ srcu_read_unlock(&kvm->srcu, rcu_idx);
|
||||
+}
|
||||
+
|
||||
+static long get_nx_lpage_recovery_timeout(u64 start_time)
|
||||
+{
|
||||
+ return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
|
||||
+ ? start_time + 60 * HZ - get_jiffies_64()
|
||||
+ : MAX_SCHEDULE_TIMEOUT;
|
||||
+}
|
||||
+
|
||||
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
|
||||
+{
|
||||
+ u64 start_time;
|
||||
+ long remaining_time;
|
||||
+
|
||||
+ while (true) {
|
||||
+ start_time = get_jiffies_64();
|
||||
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
|
||||
+
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ while (!kthread_should_stop() && remaining_time > 0) {
|
||||
+ schedule_timeout(remaining_time);
|
||||
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ }
|
||||
+
|
||||
+ set_current_state(TASK_RUNNING);
|
||||
+
|
||||
+ if (kthread_should_stop())
|
||||
+ return 0;
|
||||
+
|
||||
+ kvm_recover_nx_lpages(kvm);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int kvm_mmu_post_init_vm(struct kvm *kvm)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
|
||||
+ "kvm-nx-lpage-recovery",
|
||||
+ &kvm->arch.nx_lpage_recovery_thread);
|
||||
+ if (!err)
|
||||
+ kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
|
||||
+{
|
||||
+ if (kvm->arch.nx_lpage_recovery_thread)
|
||||
+ kthread_stop(kvm->arch.nx_lpage_recovery_thread);
|
||||
+}
|
||||
--- a/arch/x86/kvm/mmu.h
|
||||
+++ b/arch/x86/kvm/mmu.h
|
||||
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_
|
||||
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot, u64 gfn);
|
||||
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
|
||||
+
|
||||
+int kvm_mmu_post_init_vm(struct kvm *kvm);
|
||||
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
|
||||
+
|
||||
#endif
|
||||
--- a/arch/x86/kvm/x86.c
|
||||
+++ b/arch/x86/kvm/x86.c
|
||||
@@ -9342,6 +9342,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
|
||||
|
||||
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
+ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
||||
|
||||
@@ -9373,6 +9374,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un
|
||||
return 0;
|
||||
}
|
||||
|
||||
+int kvm_arch_post_init_vm(struct kvm *kvm)
|
||||
+{
|
||||
+ return kvm_mmu_post_init_vm(kvm);
|
||||
+}
|
||||
+
|
||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu_load(vcpu);
|
||||
@@ -9474,6 +9480,11 @@ int x86_set_memory_region(struct kvm *kv
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(x86_set_memory_region);
|
||||
|
||||
+void kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
||||
+{
|
||||
+ kvm_mmu_pre_destroy_vm(kvm);
|
||||
+}
|
||||
+
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
if (current->mm == kvm->mm) {
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -626,6 +626,23 @@ static int kvm_create_vm_debugfs(struct
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Called after the VM is otherwise initialized, but just before adding it to
|
||||
+ * the vm_list.
|
||||
+ */
|
||||
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Called just after removing the VM from the vm_list, but before doing any
|
||||
+ * other destruction.
|
||||
+ */
|
||||
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
static struct kvm *kvm_create_vm(unsigned long type)
|
||||
{
|
||||
int r, i;
|
||||
@@ -676,11 +693,15 @@ static struct kvm *kvm_create_vm(unsigne
|
||||
rcu_assign_pointer(kvm->buses[i],
|
||||
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
|
||||
if (!kvm->buses[i])
|
||||
- goto out_err;
|
||||
+ goto out_err_no_mmu_notifier;
|
||||
}
|
||||
|
||||
r = kvm_init_mmu_notifier(kvm);
|
||||
if (r)
|
||||
+ goto out_err_no_mmu_notifier;
|
||||
+
|
||||
+ r = kvm_arch_post_init_vm(kvm);
|
||||
+ if (r)
|
||||
goto out_err;
|
||||
|
||||
mutex_lock(&kvm_lock);
|
||||
@@ -692,6 +713,11 @@ static struct kvm *kvm_create_vm(unsigne
|
||||
return kvm;
|
||||
|
||||
out_err:
|
||||
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
|
||||
+ if (kvm->mmu_notifier.ops)
|
||||
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
|
||||
+#endif
|
||||
+out_err_no_mmu_notifier:
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
out_err_no_irq_srcu:
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
@@ -734,6 +760,8 @@ static void kvm_destroy_vm(struct kvm *k
|
||||
mutex_lock(&kvm_lock);
|
||||
list_del(&kvm->vm_list);
|
||||
mutex_unlock(&kvm_lock);
|
||||
+ kvm_arch_pre_destroy_vm(kvm);
|
||||
+
|
||||
kvm_free_irq_routing(kvm);
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
|
||||
193
debian/patches/bugfix/x86/itlb_multihit/0007-Documentation-Add-ITLB_MULTIHIT-documentation.patch
vendored
Normal file
193
debian/patches/bugfix/x86/itlb_multihit/0007-Documentation-Add-ITLB_MULTIHIT-documentation.patch
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
From: "Gomez Iglesias, Antonio" <antonio.gomez.iglesias@intel.com>
|
||||
Date: Mon, 4 Nov 2019 20:26:00 +0100
|
||||
Subject: [7/7] Documentation: Add ITLB_MULTIHIT documentation
|
||||
Origin: https://git.kernel.org/linus/7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d
|
||||
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2018-12207
|
||||
|
||||
Add the initial ITLB_MULTIHIT documentation.
|
||||
|
||||
[ tglx: Add it to the index so it gets actually built. ]
|
||||
|
||||
Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
|
||||
Signed-off-by: Nelson D'Souza <nelson.dsouza@linux.intel.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
Documentation/admin-guide/hw-vuln/index.rst | 1 +
|
||||
.../admin-guide/hw-vuln/multihit.rst | 163 ++++++++++++++++++
|
||||
2 files changed, 164 insertions(+)
|
||||
create mode 100644 Documentation/admin-guide/hw-vuln/multihit.rst
|
||||
|
||||
--- a/Documentation/admin-guide/hw-vuln/index.rst
|
||||
+++ b/Documentation/admin-guide/hw-vuln/index.rst
|
||||
@@ -13,3 +13,4 @@ are configurable at compile, boot or run
|
||||
l1tf
|
||||
mds
|
||||
tsx_async_abort
|
||||
+ multihit.rst
|
||||
--- /dev/null
|
||||
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
|
||||
@@ -0,0 +1,163 @@
|
||||
+iTLB multihit
|
||||
+=============
|
||||
+
|
||||
+iTLB multihit is an erratum where some processors may incur a machine check
|
||||
+error, possibly resulting in an unrecoverable CPU lockup, when an
|
||||
+instruction fetch hits multiple entries in the instruction TLB. This can
|
||||
+occur when the page size is changed along with either the physical address
|
||||
+or cache type. A malicious guest running on a virtualized system can
|
||||
+exploit this erratum to perform a denial of service attack.
|
||||
+
|
||||
+
|
||||
+Affected processors
|
||||
+-------------------
|
||||
+
|
||||
+Variations of this erratum are present on most Intel Core and Xeon processor
|
||||
+models. The erratum is not present on:
|
||||
+
|
||||
+ - non-Intel processors
|
||||
+
|
||||
+ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
|
||||
+
|
||||
+ - Intel processors that have the PSCHANGE_MC_NO bit set in the
|
||||
+ IA32_ARCH_CAPABILITIES MSR.
|
||||
+
|
||||
+
|
||||
+Related CVEs
|
||||
+------------
|
||||
+
|
||||
+The following CVE entry is related to this issue:
|
||||
+
|
||||
+ ============== =================================================
|
||||
+ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change
|
||||
+ ============== =================================================
|
||||
+
|
||||
+
|
||||
+Problem
|
||||
+-------
|
||||
+
|
||||
+Privileged software, including OS and virtual machine managers (VMM), are in
|
||||
+charge of memory management. A key component in memory management is the control
|
||||
+of the page tables. Modern processors use virtual memory, a technique that creates
|
||||
+the illusion of a very large memory for processors. This virtual space is split
|
||||
+into pages of a given size. Page tables translate virtual addresses to physical
|
||||
+addresses.
|
||||
+
|
||||
+To reduce latency when performing a virtual to physical address translation,
|
||||
+processors include a structure, called TLB, that caches recent translations.
|
||||
+There are separate TLBs for instruction (iTLB) and data (dTLB).
|
||||
+
|
||||
+Under this errata, instructions are fetched from a linear address translated
|
||||
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
|
||||
+paging structure so that the same linear address using large page size (2 MB, 4
|
||||
+MB, 1 GB) with a different physical address or memory type. After the page
|
||||
+structure modification but before the software invalidates any iTLB entries for
|
||||
+the linear address, a code fetch that happens on the same linear address may
|
||||
+cause a machine-check error which can result in a system hang or shutdown.
|
||||
+
|
||||
+
|
||||
+Attack scenarios
|
||||
+----------------
|
||||
+
|
||||
+Attacks against the iTLB multihit erratum can be mounted from malicious
|
||||
+guests in a virtualized system.
|
||||
+
|
||||
+
|
||||
+iTLB multihit system information
|
||||
+--------------------------------
|
||||
+
|
||||
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
|
||||
+multihit status of the system:whether the system is vulnerable and which
|
||||
+mitigations are active. The relevant sysfs file is:
|
||||
+
|
||||
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||
+
|
||||
+The possible values in this file are:
|
||||
+
|
||||
+.. list-table::
|
||||
+
|
||||
+ * - Not affected
|
||||
+ - The processor is not vulnerable.
|
||||
+ * - KVM: Mitigation: Split huge pages
|
||||
+ - Software changes mitigate this issue.
|
||||
+ * - KVM: Vulnerable
|
||||
+ - The processor is vulnerable, but no mitigation enabled
|
||||
+
|
||||
+
|
||||
+Enumeration of the erratum
|
||||
+--------------------------------
|
||||
+
|
||||
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
|
||||
+and will be set on CPU's which are mitigated against this issue.
|
||||
+
|
||||
+ ======================================= =========== ===============================
|
||||
+ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model
|
||||
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model
|
||||
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable
|
||||
+ ======================================= =========== ===============================
|
||||
+
|
||||
+
|
||||
+Mitigation mechanism
|
||||
+-------------------------
|
||||
+
|
||||
+This erratum can be mitigated by restricting the use of large page sizes to
|
||||
+non-executable pages. This forces all iTLB entries to be 4K, and removes
|
||||
+the possibility of multiple hits.
|
||||
+
|
||||
+In order to mitigate the vulnerability, KVM initially marks all huge pages
|
||||
+as non-executable. If the guest attempts to execute in one of those pages,
|
||||
+the page is broken down into 4K pages, which are then marked executable.
|
||||
+
|
||||
+If EPT is disabled or not available on the host, KVM is in control of TLB
|
||||
+flushes and the problematic situation cannot happen. However, the shadow
|
||||
+EPT paging mechanism used by nested virtualization is vulnerable, because
|
||||
+the nested guest can trigger multiple iTLB hits by modifying its own
|
||||
+(non-nested) page tables. For simplicity, KVM will make large pages
|
||||
+non-executable in all shadow paging modes.
|
||||
+
|
||||
+Mitigation control on the kernel command line and KVM - module parameter
|
||||
+------------------------------------------------------------------------
|
||||
+
|
||||
+The KVM hypervisor mitigation mechanism for marking huge pages as
|
||||
+non-executable can be controlled with a module parameter "nx_huge_pages=".
|
||||
+The kernel command line allows to control the iTLB multihit mitigations at
|
||||
+boot time with the option "kvm.nx_huge_pages=".
|
||||
+
|
||||
+The valid arguments for these options are:
|
||||
+
|
||||
+ ========== ================================================================
|
||||
+ force Mitigation is enabled. In this case, the mitigation implements
|
||||
+ non-executable huge pages in Linux kernel KVM module. All huge
|
||||
+ pages in the EPT are marked as non-executable.
|
||||
+ If a guest attempts to execute in one of those pages, the page is
|
||||
+ broken down into 4K pages, which are then marked executable.
|
||||
+
|
||||
+ off Mitigation is disabled.
|
||||
+
|
||||
+ auto Enable mitigation only if the platform is affected and the kernel
|
||||
+ was not booted with the "mitigations=off" command line parameter.
|
||||
+ This is the default option.
|
||||
+ ========== ================================================================
|
||||
+
|
||||
+
|
||||
+Mitigation selection guide
|
||||
+--------------------------
|
||||
+
|
||||
+1. No virtualization in use
|
||||
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
+
|
||||
+ The system is protected by the kernel unconditionally and no further
|
||||
+ action is required.
|
||||
+
|
||||
+2. Virtualization with trusted guests
|
||||
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
+
|
||||
+ If the guest comes from a trusted source, you may assume that the guest will
|
||||
+ not attempt to maliciously exploit these errata and no further action is
|
||||
+ required.
|
||||
+
|
||||
+3. Virtualization with untrusted guests
|
||||
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
+ If the guest comes from an untrusted source, the guest host kernel will need
|
||||
+ to apply iTLB multihit mitigation via the kernel command line or kvm
|
||||
+ module parameter.
|
||||
9
debian/patches/series
vendored
9
debian/patches/series
vendored
@@ -153,6 +153,15 @@ bugfix/x86/taa/0006-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch
|
||||
bugfix/x86/taa/0007-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch
|
||||
bugfix/x86/taa/0008-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch
|
||||
bugfix/x86/taa/0009-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch
|
||||
bugfix/x86/itlb_multihit/0001-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch
|
||||
bugfix/x86/itlb_multihit/0001-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch
|
||||
bugfix/x86/itlb_multihit/0001-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch
|
||||
bugfix/x86/itlb_multihit/0002-x86-cpu-Add-Tremont-to-the-cpu-vulnerability-whiteli.patch
|
||||
bugfix/x86/itlb_multihit/0003-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch
|
||||
bugfix/x86/itlb_multihit/0004-kvm-mmu-ITLB_MULTIHIT-mitigation.patch
|
||||
bugfix/x86/itlb_multihit/0005-kvm-Add-helper-function-for-creating-VM-worker-threa.patch
|
||||
bugfix/x86/itlb_multihit/0006-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch
|
||||
bugfix/x86/itlb_multihit/0007-Documentation-Add-ITLB_MULTIHIT-documentation.patch
|
||||
|
||||
# Fix exported symbol versions
|
||||
bugfix/all/module-disable-matching-missing-version-crc.patch
|
||||
|
||||
Reference in New Issue
Block a user